[ovs-dev] [PATCH 1/5] netdev-offload: Fix Clang's static analyzer 'null pointer dereference' warnings.

2024-05-16 Thread Mike Pattrick
Clang's static analyzer will complain about a null pointer dereference
because dumps can be set to null and then there is a loop where it could
have been written to.

Instead, return early from the netdev_ports_flow_dump_create function if
dumps is NULL.

Signed-off-by: Mike Pattrick 
---
 lib/netdev-offload.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index 931d634e1..02b1cf203 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -638,7 +638,14 @@ netdev_ports_flow_dump_create(const char *dpif_type, int 
*ports, bool terse)
 }
 }
 
-dumps = count ? xzalloc(sizeof *dumps * count) : NULL;
+if (count == 0) {
+*ports = 0;
+ovs_rwlock_unlock(_to_netdev_rwlock);
+
+return NULL;
+}
+
+dumps = xzalloc(sizeof *dumps * count);
 
 HMAP_FOR_EACH (data, portno_node, _to_netdev) {
 if (netdev_get_dpif_type(data->netdev) == dpif_type) {
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 1/2] ofproto-dpif-mirror: Reduce number of function parameters.

2024-04-29 Thread Mike Pattrick
Previously the mirror_set() and mirror_get() functions took a large
number of parameters, which was inefficient and difficult to read and
extend. This patch moves most of the parameters into a struct.

Signed-off-by: Mike Pattrick 
Acked-by: Simon Horman 
Acked-by: Eelco Chaudron 
Signed-off-by: Mike Pattrick 
---
 ofproto/ofproto-dpif-mirror.c | 60 ++-
 ofproto/ofproto-dpif-mirror.h | 40 ++-
 ofproto/ofproto-dpif-xlate.c  | 29 -
 ofproto/ofproto-dpif.c| 23 +++---
 4 files changed, 88 insertions(+), 64 deletions(-)

diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c
index 343b75f0e..4967ecc9a 100644
--- a/ofproto/ofproto-dpif-mirror.c
+++ b/ofproto/ofproto-dpif-mirror.c
@@ -207,19 +207,22 @@ mirror_bundle_dst(struct mbridge *mbridge, struct 
ofbundle *ofbundle)
 }
 
 int
-mirror_set(struct mbridge *mbridge, void *aux, const char *name,
-   struct ofbundle **srcs, size_t n_srcs,
-   struct ofbundle **dsts, size_t n_dsts,
-   unsigned long *src_vlans, struct ofbundle *out_bundle,
-   uint16_t snaplen,
-   uint16_t out_vlan)
+mirror_set(struct mbridge *mbridge, void *aux,
+   const struct ofproto_mirror_settings *ms,
+   const struct mirror_bundles *mb)
 {
 struct mbundle *mbundle, *out;
 mirror_mask_t mirror_bit;
 struct mirror *mirror;
 struct hmapx srcs_map;  /* Contains "struct ofbundle *"s. */
 struct hmapx dsts_map;  /* Contains "struct ofbundle *"s. */
+uint16_t out_vlan;
 
+if (!ms || !mbridge) {
+return EINVAL;
+}
+
+out_vlan = ms->out_vlan;
 mirror = mirror_lookup(mbridge, aux);
 if (!mirror) {
 int idx;
@@ -227,7 +230,7 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 idx = mirror_scan(mbridge);
 if (idx < 0) {
 VLOG_WARN("maximum of %d port mirrors reached, cannot create %s",
-  MAX_MIRRORS, name);
+  MAX_MIRRORS, ms->name);
 return EFBIG;
 }
 
@@ -242,8 +245,8 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 unsigned long *vlans = ovsrcu_get(unsigned long *, >vlans);
 
 /* Get the new configuration. */
-if (out_bundle) {
-out = mbundle_lookup(mbridge, out_bundle);
+if (mb->out_bundle) {
+out = mbundle_lookup(mbridge, mb->out_bundle);
 if (!out) {
 mirror_destroy(mbridge, mirror->aux);
 return EINVAL;
@@ -252,16 +255,16 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 } else {
 out = NULL;
 }
-mbundle_lookup_multiple(mbridge, srcs, n_srcs, _map);
-mbundle_lookup_multiple(mbridge, dsts, n_dsts, _map);
+mbundle_lookup_multiple(mbridge, mb->srcs, mb->n_srcs, _map);
+mbundle_lookup_multiple(mbridge, mb->dsts, mb->n_dsts, _map);
 
 /* If the configuration has not changed, do nothing. */
 if (hmapx_equals(_map, >srcs)
 && hmapx_equals(_map, >dsts)
-&& vlan_bitmap_equal(vlans, src_vlans)
+&& vlan_bitmap_equal(vlans, ms->src_vlans)
 && mirror->out == out
 && mirror->out_vlan == out_vlan
-&& mirror->snaplen == snaplen)
+&& mirror->snaplen == ms->snaplen)
 {
 hmapx_destroy(_map);
 hmapx_destroy(_map);
@@ -275,15 +278,15 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 hmapx_swap(_map, >dsts);
 hmapx_destroy(_map);
 
-if (vlans || src_vlans) {
+if (vlans || ms->src_vlans) {
 ovsrcu_postpone(free, vlans);
-vlans = vlan_bitmap_clone(src_vlans);
+vlans = vlan_bitmap_clone(ms->src_vlans);
 ovsrcu_set(>vlans, vlans);
 }
 
 mirror->out = out;
 mirror->out_vlan = out_vlan;
-mirror->snaplen = snaplen;
+mirror->snaplen = ms->snaplen;
 
 /* Update mbundles. */
 mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
@@ -406,23 +409,22 @@ mirror_update_stats(struct mbridge *mbridge, 
mirror_mask_t mirrors,
 /* Retrieves the mirror numbered 'index' in 'mbridge'.  Returns true if such a
  * mirror exists, false otherwise.
  *
- * If successful, '*vlans' receives the mirror's VLAN membership information,
+ * If successful 'mc->vlans' receives the mirror's VLAN membership information,
  * either a null pointer if the mirror includes all VLANs or a 4096-bit bitmap
  * in which a 1-bit indicates that the mirror includes a particular VLAN,
- * '*dup_mirrors' receives a bitmap of mirrors whose output duplicates mirror
- * 'index', '*out' receives the output ofbundle (if any), and '*out_vlan'
- * receives the output VLAN (if any).
+ * 'mc->dup_mirrors' receives a bitmap of mirrors whose output duplic

[ovs-dev] [PATCH v8 2/2] ofproto-dpif-mirror: Add support for pre-selection filter.

2024-04-29 Thread Mike Pattrick
Currently a bridge mirror will collect all packets and tools like
ovs-tcpdump can apply additional filters after they have already been
duplicated by vswitchd. This can result in inefficient collection.

This patch adds support to apply pre-selection to bridge mirrors, which
can limit which packets are mirrored based on flow metadata. This
significantly improves overall vswitchd performance during mirroring if
only a subset of traffic is required.

Signed-off-by: Mike Pattrick 
---
v8:
 - Corrected code from v7 related to sequence and in_port. Mirrors
   reject filters with an in_port set as this could cause confusion.
 - Combined ovsrcu pointers into a new struct, minimatch wasn't used
   because the minimatch_* functions didn't fit the usage here.
 - Added a test to check for modifying filters when partially
   overlapping flows already exist.
 - Corrected documentation.
---
 Documentation/ref/ovs-tcpdump.8.rst |   8 +-
 NEWS|   6 +
 lib/flow.h  |   9 ++
 ofproto/ofproto-dpif-mirror.c   | 101 -
 ofproto/ofproto-dpif-mirror.h   |   9 +-
 ofproto/ofproto-dpif-xlate.c|  15 ++-
 ofproto/ofproto-dpif.c  |  12 +-
 ofproto/ofproto.h   |   3 +
 tests/ofproto-dpif.at   | 165 
 utilities/ovs-tcpdump.in|  13 ++-
 vswitchd/bridge.c   |  13 ++-
 vswitchd/vswitch.ovsschema  |   7 +-
 vswitchd/vswitch.xml|  16 +++
 13 files changed, 362 insertions(+), 15 deletions(-)

diff --git a/Documentation/ref/ovs-tcpdump.8.rst 
b/Documentation/ref/ovs-tcpdump.8.rst
index b9f8cdf6f..e21e61211 100644
--- a/Documentation/ref/ovs-tcpdump.8.rst
+++ b/Documentation/ref/ovs-tcpdump.8.rst
@@ -61,8 +61,14 @@ Options
 
   If specified, mirror all ports (optional).
 
+* ``--filter ``
+
+  If specified, only mirror flows that match the provided OpenFlow filter.
+  The available fields are documented in ``ovs-fields(7)``.
+
 See Also
 
 
 ``ovs-appctl(8)``, ``ovs-vswitchd(8)``, ``ovs-pcap(1)``,
-``ovs-tcpundump(1)``, ``tcpdump(8)``, ``wireshark(8)``.
+``ovs-fields(7)``, ``ovs-tcpundump(1)``, ``tcpdump(8)``,
+``wireshark(8)``.
diff --git a/NEWS b/NEWS
index b92cec532..f3a4bf076 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,12 @@ Post-v3.3.0
- The primary development branch has been renamed from 'master' to 'main'.
  The OVS tree remains hosted on GitHub.
  https://github.com/openvswitch/ovs.git
+   - ovs-vsctl:
+ * Added a new filter column in the Mirror table which can be used to
+   apply filters to mirror ports.
+   - ovs-tcpdump:
+ * Added command line parameter --filter to enable filtering the flows
+   that are captured by tcpdump.
 
 
 v3.3.0 - 16 Feb 2024
diff --git a/lib/flow.h b/lib/flow.h
index 75a9be3c1..60ec4b0d7 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -939,6 +939,15 @@ flow_union_with_miniflow(struct flow *dst, const struct 
miniflow *src)
 flow_union_with_miniflow_subset(dst, src, src->map);
 }
 
+/* Perform a bitwise OR of minimask 'src' mask data with the equivalent
+ * fields in 'dst', storing the result in 'dst'. */
+static inline void
+flow_wildcards_union_with_minimask(struct flow_wildcards *dst,
+   const struct minimask *src)
+{
+flow_union_with_miniflow_subset(>masks, >masks, src->masks.map);
+}
+
 static inline bool is_ct_valid(const struct flow *flow,
const struct flow_wildcards *mask,
struct flow_wildcards *wc)
diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c
index 4967ecc9a..7020a5a5f 100644
--- a/ofproto/ofproto-dpif-mirror.c
+++ b/ofproto/ofproto-dpif-mirror.c
@@ -21,6 +21,7 @@
 #include "cmap.h"
 #include "hmapx.h"
 #include "ofproto.h"
+#include "ofproto-dpif-trace.h"
 #include "vlan-bitmap.h"
 #include "openvswitch/vlog.h"
 
@@ -48,6 +49,11 @@ struct mbundle {
 mirror_mask_t mirror_out;   /* Mirrors that output to this mbundle. */
 };
 
+struct filtermask {
+struct miniflow *flow;
+struct minimask *mask;
+};
+
 struct mirror {
 struct mbridge *mbridge;/* Owning ofproto. */
 size_t idx; /* In ofproto's "mirrors" array. */
@@ -57,6 +63,10 @@ struct mirror {
 struct hmapx srcs;  /* Contains "struct mbundle*"s. */
 struct hmapx dsts;  /* Contains "struct mbundle*"s. */
 
+/* Filter criteria. */
+OVSRCU_TYPE(struct filtermask *) filter_mask;
+char *filter_str;
+
 /* This is accessed by handler threads assuming RCU protection (see
  * mirror_get()), but can be manipulated by mirror_set() without any
  * explicit synchronization. */
@@ -83,6 +93,23 @@ static void mbundle_lookup_multiple(const struct mbridge *, 
struct ofbundle **,
 static int mirror_scan(struct

Re: [ovs-dev] [PATCH v6 2/2] netlink-conntrack: Optimize flushing ct zone.

2024-03-06 Thread Mike Pattrick
On Mon, Mar 4, 2024 at 3:22 AM Felix Huettner via dev
 wrote:
>
> Previously the kernel did not provide a netlink interface to flush/list
> only conntrack entries matching a specific zone. With [1] and [2] it is now
> possible to flush and list conntrack entries filtered by zone. Older
> kernels not yet supporting this feature will ignore the filter.
> For the list request that means just returning all entries (which we can
> then filter in userspace as before).
> For the flush request that means deleting all conntrack entries.
>
> The implementation is now identical to the windows one, so we combine
> them.
>
> These significantly improves the performance of flushing conntrack zones
> when the conntrack table is large. Since flushing a conntrack zone is
> normally triggered via an openflow command it blocks the main ovs thread
> and thereby also blocks new flows from being applied. Using this new
> feature we can reduce the flushing time for zones by around 93%.
>
> In combination with OVN the creation of a Logical_Router (which causes
> the flushing of a ct zone) could block other operations, e.g. the
> failover of Logical_Routers (as they cause new flows to be created).
> This is visible from a user perspective as a ovn-controller that is idle
> (as it waits for vswitchd) and vswitchd reporting:
> "blocked 1000 ms waiting for main to quiesce" (potentially with ever
> increasing times).
>
> The following performance tests where run in a qemu vm with 500.000
> conntrack entries distributed evenly over 500 ct zones using `ovstest
> test-netlink-conntrack flush zone=`.
>
>   |  flush zone with 1000 entries  |   flush zone with no entry |
>   +-+--+-+--|
>   |   with the patch| without  |   with the patch| without  |
>   +--+--+--+--+--+--|
>   | v6.8-rc4 |  v6.7.1  | v6.8-rc4 | v6.8-rc4 |  v6.7.1  | v6.8-rc4 |
> +-+--+--+--+--+--+--|
> | Min |  0.260   |  3.946   |  3.497   |  0.228   |  3.462   |  3.212   |
> | Median  |  0.319   |  4.237   |  4.349   |  0.298   |  4.460   |  4.010   |
> | 90%ile  |  0.335   |  4.367   |  4.522   |  0.325   |  4.662   |  4.572   |
> | 99%ile  |  0.348   |  4.495   |  4.773   |  0.340   |  4.931   |  6.003   |
> | Max |  0.362   |  4.543   |  5.054   |  0.348   |  5.390   |  6.396   |
> | Mean|  0.320   |  4.236   |  4.331   |  0.296   |  4.430   |  4.071   |
> | Total   |  80.02   |  1058|  1082|  73.93   |  1107|  1017|
>
> [1]: 
> https://github.com/torvalds/linux/commit/eff3c558bb7e61c41b53e4c8130e514a5a4df9ba
> [2]: 
> https://github.com/torvalds/linux/commit/fa173a1b4e3fd1ab5451cbc57de6fc624c824b0a
>
> Co-Authored-By: Luca Czesla 
> Signed-off-by: Luca Czesla 
> Co-Authored-By: Max Lamprecht 
> Signed-off-by: Max Lamprecht 
> Signed-off-by: Felix Huettner 
> ---

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3] ovsdb: Don't iterate over rows on empty mutation.

2024-03-06 Thread Mike Pattrick
Previously when an empty mutation was used to count the number of rows
in a table, OVSDB would iterate over all rows twice. First to perform an
RBAC check, and then to perform the no-operation.

This change adds a short circuit to mutate operations with no conditions
and an empty mutation set, returning immediately. One notable change in
functionality is not performing the RBAC check in this condition, as no
mutation actually takes place.

Reported-by: Terry Wilson 
Reported-at: https://issues.redhat.com/browse/FDP-359
Signed-off-by: Mike Pattrick 
---
v2: Added additional non-rbac tests, and support for conditional
counting without the rbac check
v3: Changed a struct to a size_t.
---
 ovsdb/execution.c| 23 +-
 ovsdb/mutation.h |  6 +
 tests/ovsdb-execution.at | 51 
 tests/ovsdb-rbac.at  | 23 ++
 4 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/ovsdb/execution.c b/ovsdb/execution.c
index 8c20c3b54..f4cc9e802 100644
--- a/ovsdb/execution.c
+++ b/ovsdb/execution.c
@@ -585,6 +585,16 @@ mutate_row_cb(const struct ovsdb_row *row, void *mr_)
 return *mr->error == NULL;
 }
 
+static bool
+count_row_cb(const struct ovsdb_row *row OVS_UNUSED, void *rc)
+{
+size_t *row_count = rc;
+
+(*row_count)++;
+
+return true;
+}
+
 static struct ovsdb_error *
 ovsdb_execute_mutate(struct ovsdb_execution *x, struct ovsdb_parser *parser,
  struct json *result)
@@ -609,7 +619,18 @@ ovsdb_execute_mutate(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 error = ovsdb_condition_from_json(table->schema, where, x->symtab,
   );
 }
-if (!error) {
+if (!error && ovsdb_mutation_set_empty()) {
+/* Special case with no mutations, just return the row count. */
+if (ovsdb_condition_empty()) {
+json_object_put(result, "count",
+json_integer_create(hmap_count(>rows)));
+} else {
+size_t row_count = 0;
+ovsdb_query(table, , count_row_cb, _count);
+json_object_put(result, "count",
+json_integer_create(row_count));
+}
+} else if (!error) {
 mr.n_matches = 0;
 mr.txn = x->txn;
 mr.mutations = 
diff --git a/ovsdb/mutation.h b/ovsdb/mutation.h
index 7566ef199..05d4a262a 100644
--- a/ovsdb/mutation.h
+++ b/ovsdb/mutation.h
@@ -69,4 +69,10 @@ void ovsdb_mutation_set_destroy(struct ovsdb_mutation_set *);
 struct ovsdb_error *ovsdb_mutation_set_execute(
 struct ovsdb_row *, const struct ovsdb_mutation_set *) 
OVS_WARN_UNUSED_RESULT;
 
+static inline bool ovsdb_mutation_set_empty(
+const struct ovsdb_mutation_set *ms)
+{
+return ms->n_mutations == 0;
+}
+
 #endif /* ovsdb/mutation.h */
diff --git a/tests/ovsdb-execution.at b/tests/ovsdb-execution.at
index fd1c7a239..1ffa2b738 100644
--- a/tests/ovsdb-execution.at
+++ b/tests/ovsdb-execution.at
@@ -1201,4 +1201,55 @@ OVSDB_CHECK_EXECUTION([garbage collection],
 [{"rows":[]}]
 ]])])
 
+OVSDB_CHECK_EXECUTION([insert rows, count with mutation],
+  [ordinal_schema],
+  "ordinals",
+  {"op": "insert",
+   "table": "ordinals",
+   "row": {"number": 0, "name": "zero"},
+   "uuid-name": "first"}]]],
+   [[["ordinals",
+  {"op": "insert",
+   "table": "ordinals",
+   "row": {"number": 1, "name": "one"},
+   "uuid-name": "first"}]]],
+   [[["ordinals",
+  {"op": "mutate",
+   "table": "ordinals",
+   "where": [["name", "==", "zero"]],
+   "mutations": []}]]],
+   [[["ordinals",
+  {"op": "mutate",
+   "table": "ordinals",
+   "where": [["name", "==", "one"]],
+   "mutations": []}]]],
+   [[["ordinals",
+  {"op": "insert",
+   "table": "ordinals",
+   "row": {"number": 2, "name": "one"},
+   "uuid-name": "first"}]]],
+   [[["ordinals",
+  {"op": "mutate",
+   "table": "ordinals",
+   "where": [["name", "==", "one"]],
+   "mutations": []}]]],
+   [[["ordinals",
+  {"op": "delete",
+   "table": "ordinals",
+   "where": [["name", "==", "zero"]]}]]],
+   [[["

Re: [ovs-dev] [PATCH v6 1/2] util: Support checking for kernel versions.

2024-03-05 Thread Mike Pattrick
On Mon, Mar 4, 2024 at 3:22 AM Felix Huettner via dev
 wrote:
>
> Extract checking for a given kernel version to a separate function.
> It will be used also in the next patch.
>
> Signed-off-by: Felix Huettner 
> ---

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netdev-dpdk: Clean up all marker flags if no offloads requested.

2024-03-12 Thread Mike Pattrick
On Mon, Mar 11, 2024 at 2:31 PM Ilya Maximets  wrote:
>
> Some drivers (primarily, Intel ones) do not expect any marking flags
> being set if no offloads are requested.  If these flags are present,
> driver will fail Tx preparation or behave abnormally.
>
> For example, ixgbe driver will refuse to process the packet with
> only RTE_MBUF_F_TX_TUNNEL_GENEVE and RTE_MBUF_F_TX_OUTER_IPV4 set.
> This pretty much breaks Geneve tunnels on these cards.
>
> An extra check is added to make sure we don't have any unexpected
> Tx offload flags set.
>
> Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
> Reported-at: https://github.com/openvswitch/ovs-issues/issues/321
> Signed-off-by: Ilya Maximets 

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/3] netdev-dpdk: Clear inner packet marks if no inner offloads requested.

2024-03-14 Thread Mike Pattrick
On Wed, Mar 13, 2024 at 1:29 PM Ilya Maximets  wrote:
>
> In some cases only outer offloads may be requested for a tunneled
> packet.  In this case there is no need to mark the type of an
> inner packet.  Clean these flags up to avoid potential confusion
> of DPDK drivers.
>
> Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
> Signed-off-by: Ilya Maximets 

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/3] netdev-dpdk: Fix TCP check during Tx offload preparation.

2024-03-14 Thread Mike Pattrick
On Wed, Mar 13, 2024 at 1:29 PM Ilya Maximets  wrote:
>
> RTE_MBUF_F_TX_TCP_CKSUM is not a flag, but a 2-bit field, so checking
> it with a simple binary 'and' is incorrect.  For example, this check
> will succeed for a packet with UDP checksum requested as well.
>
> Fix the check to avoid wrongly initializing tso_segz and potentially
> accessing UDP header via TCP structure pointer.
>
> The IPv4 checksum flag has to be set for any L4 checksum request,
> regardless of the type, so moving this check out of the TCP condition.
>
> Fixes: 8b5fe2dc6080 ("userspace: Add Generic Segmentation Offloading.")
> Signed-off-by: Ilya Maximets 

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 3/3] netdev-dpdk: Fix tunnel type check during Tx offload preparation.

2024-03-14 Thread Mike Pattrick
On Wed, Mar 13, 2024 at 1:29 PM Ilya Maximets  wrote:
>
> Tunnel types are not flags, but 4-bit fields, so checking them with
> a simple binary 'and' is incorrect and may produce false-positive
> matches.
>
> While the current implementation is unlikely to cause any issues today,
> since both RTE_MBUF_F_TX_TUNNEL_VXLAN and RTE_MBUF_F_TX_TUNNEL_GENEVE
> only have 1 bit set, it is risky to have this code and it may lead
> to problems if we add support for other tunnel types in the future.
>
> Use proper field checks instead.  Also adding a warning for unexpected
> tunnel types in case something goes wrong.
>
> Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
> Signed-off-by: Ilya Maximets 

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3] ovs-monitor-ipsec: LibreSwan autodetect paths.

2024-03-21 Thread Mike Pattrick
In v4.0, LibreSwan changed a default paths that had been hardcoded in
ovs-monitor-ipsec, breaking some uses of this script. This patch adds
support for both old and newer versions by auto detecting the version
of LibreSwan and then choosing the correct path.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1975039
Reported-by: Qijun Ding 
Fixes: d6afbc00d5b3 ("ipsec: Allow custom file locations.")
Signed-off-by: Mike Pattrick 
---
v2: Don't extract variables from ipsec script
v3: Removed use of packaging
---
 ipsec/ovs-monitor-ipsec.in | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in
index 7945162f9..bc7ac5523 100755
--- a/ipsec/ovs-monitor-ipsec.in
+++ b/ipsec/ovs-monitor-ipsec.in
@@ -457,14 +457,30 @@ conn prevent_unencrypted_vxlan
 CERTKEY_PREFIX = "ovs_certkey_"
 
 def __init__(self, libreswan_root_prefix, args):
+# Collect version infromation
+self.IPSEC = libreswan_root_prefix + "/usr/sbin/ipsec"
+proc = subprocess.Popen([self.IPSEC, "--version"],
+stdout=subprocess.PIPE,
+encoding="latin1")
+pout, perr = proc.communicate()
+
+v = re.match("^Libreswan (.*)$", pout)
+try:
+version = int(v.group(1).split(".")[0])
+except:
+version = 0
+
+if version >= 4:
+ipsec_d = args.ipsec_d if args.ipsec_d else "/var/lib/ipsec/nss"
+else:
+ipsec_d = args.ipsec_d if args.ipsec_d else "/etc/ipsec.d"
+
 ipsec_conf = args.ipsec_conf if args.ipsec_conf else "/etc/ipsec.conf"
-ipsec_d = args.ipsec_d if args.ipsec_d else "/etc/ipsec.d"
 ipsec_secrets = (args.ipsec_secrets if args.ipsec_secrets
 else "/etc/ipsec.secrets")
 ipsec_ctl = (args.ipsec_ctl if args.ipsec_ctl
 else "/run/pluto/pluto.ctl")
 
-self.IPSEC = libreswan_root_prefix + "/usr/sbin/ipsec"
 self.IPSEC_CONF = libreswan_root_prefix + ipsec_conf
 self.IPSEC_SECRETS = libreswan_root_prefix + ipsec_secrets
 self.IPSEC_D = "sql:" + libreswan_root_prefix + ipsec_d
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4] tunnel: Allow UDP zero checksum with IPv6 tunnels.

2024-03-21 Thread Mike Pattrick
This patch adopts the proposed RFC 6935 by allowing null UDP checksums
even if the tunnel protocol is IPv6. This is already supported by Linux
through the udp6zerocsumtx tunnel option. It is disabled by default and
IPv6 tunnels are flagged as requiring a checksum, but this patch enables
the user to set csum=false on IPv6 tunnels.

Signed-off-by: Mike Pattrick 
---
v2: Changed documentation, and added a NEWS item
v3: NEWS file merge conflict
v4: Better comments, new test
---
 NEWS  |  4 
 lib/netdev-native-tnl.c   |  2 +-
 lib/netdev-vport.c| 17 +++--
 lib/netdev.h  | 18 +-
 ofproto/tunnel.c  | 11 +--
 tests/tunnel-push-pop-ipv6.at |  9 +
 tests/tunnel-push-pop.at  |  7 +++
 tests/tunnel.at   |  2 +-
 vswitchd/vswitch.xml  | 12 +---
 9 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/NEWS b/NEWS
index c9e4064e6..6c8c4a2dc 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,10 @@ Post-v3.3.0
  * Conntrack now supports 'random' flag for selecting ports in a range
while natting and 'persistent' flag for selection of the IP address
from a range.
+ * IPv6 UDP tunnel encapsulation including Geneve and VXLAN will now
+   honour the csum option.  Configuring the interface with
+   "options:csum=false" now has the same effect as the udp6zerocsumtx
+   option has with Linux kernel UDP tunnels.
 
 
 v3.3.0 - 16 Feb 2024
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index dee9ab344..e8258bc4e 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -424,7 +424,7 @@ udp_build_header(const struct netdev_tunnel_config *tnl_cfg,
 udp = netdev_tnl_ip_build_header(data, params, IPPROTO_UDP, 0);
 udp->udp_dst = tnl_cfg->dst_port;
 
-if (params->is_ipv6 || params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
+if (params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
 /* Write a value in now to mark that we should compute the checksum
  * later. 0x is handy because it is transparent to the
  * calculation. */
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 60caa02fb..e51542e32 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -702,7 +702,9 @@ set_tunnel_config(struct netdev *dev_, const struct smap 
*args, char **errp)
 tnl_cfg.dst_port = htons(atoi(node->value));
 } else if (!strcmp(node->key, "csum") && has_csum) {
 if (!strcmp(node->value, "true")) {
-tnl_cfg.csum = true;
+tnl_cfg.csum = NETDEV_TNL_CSUM_ENABLED;
+} else if (!strcmp(node->value, "false")) {
+tnl_cfg.csum = NETDEV_TNL_CSUM_DISABLED;
 }
 } else if (!strcmp(node->key, "seq") && has_seq) {
 if (!strcmp(node->value, "true")) {
@@ -850,6 +852,15 @@ set_tunnel_config(struct netdev *dev_, const struct smap 
*args, char **errp)
 }
 }
 
+/* The default csum state for GRE is special as it does have an optional
+ * checksum but the default configuration isn't correlated with IP version
+ * like UDP tunnels are.  Likewise, tunnels with checksum at all must be in
+ * this state. */
+if (tnl_cfg.csum == NETDEV_TNL_CSUM_DEFAULT &&
+(!has_csum || strstr(type, "gre"))) {
+tnl_cfg.csum = NETDEV_TNL_DEFAULT_NO_CSUM;
+}
+
 enum tunnel_layers layers = tunnel_supported_layers(type, _cfg);
 const char *full_type = (strcmp(type, "vxlan") ? type
  : (tnl_cfg.exts & (1 << OVS_VXLAN_EXT_GPE)
@@ -1026,8 +1037,10 @@ get_tunnel_config(const struct netdev *dev, struct smap 
*args)
 }
 }
 
-if (tnl_cfg->csum) {
+if (tnl_cfg->csum == NETDEV_TNL_CSUM_ENABLED) {
 smap_add(args, "csum", "true");
+} else if (tnl_cfg->csum == NETDEV_TNL_CSUM_DISABLED) {
+smap_add(args, "csum", "false");
 }
 
 if (tnl_cfg->set_seq) {
diff --git a/lib/netdev.h b/lib/netdev.h
index 67a8486bd..5d253157c 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -111,6 +111,22 @@ enum netdev_srv6_flowlabel {
 SRV6_FLOWLABEL_COMPUTE,
 };
 
+enum netdev_tnl_csum {
+/* Default value for UDP tunnels if no configurations is present.  Enforce
+ * checksum calculation in IPv6 tunnels, disable in IPv4 tunnels. */
+NETDEV_TNL_CSUM_DEFAULT = 0,
+
+/* Checksum explicitly to be calculated. */
+NETDEV_TNL_CSUM_ENABLED,
+
+/* Checksum calculation explicitly disabled. */
+NETDEV_TNL_CSUM_DISABLED,
+
+/* A value for when there is no checksum or the default value is no
+ * checksum reguardless of IP version. */
+NETDEV_TNL_DEFAULT_NO_CSUM,
+};
+
 /* Configuration spe

Re: [ovs-dev] [PATCH] ovs-monitor-ipsec: LibreSwan autodetect paths.

2024-03-20 Thread Mike Pattrick
On Tue, Mar 19, 2024 at 5:35 PM Ilya Maximets  wrote:
>
> On 3/13/24 22:54, Mike Pattrick wrote:
> > In v4.0, LibreSwan changed a default paths that had been hardcoded in
> > ovs-monitor-ipsec, breaking some uses of this script. This patch adds
> > support for both old and newer versions by auto detecting the location
> > of these paths from LibreSwan shell script environment variables.
> >
> > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1975039
> > Reported-by: Qijun Ding 
> > Fixes: d6afbc00d5b3 ("ipsec: Allow custom file locations.")
> > Signed-off-by: Mike Pattrick 
> > ---
> >  ipsec/ovs-monitor-ipsec.in | 31 +++
> >  1 file changed, 27 insertions(+), 4 deletions(-)
> >
>
> Hi, Mike.  Thanks for working on this!
>
> Though using the knowledge that /usr/sbin/ipsec is a shell script
> and that it defines particular variables inside seems like a hack.
>
> Maybe we can just check the version instead?  We know that default
> nss path changed in 4.0.

My motivation for this method was because these paths could be changed
easier than reimplementing the ipsec script, by the maintainers or
downstream distributions. But there's nothing stopping us from
addressing any future changes as they happen, I'll resend with just a
fix for 4.0.

-M

>
> Best regards, Ilya Maximets.
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] ovs-monitor-ipsec: LibreSwan autodetect paths.

2024-03-20 Thread Mike Pattrick
In v4.0, LibreSwan changed a default paths that had been hardcoded in
ovs-monitor-ipsec, breaking some uses of this script. This patch adds
support for both old and newer versions by auto detecting the version
of LibreSwan and then choosing the correct path.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1975039
Reported-by: Qijun Ding 
Fixes: d6afbc00d5b3 ("ipsec: Allow custom file locations.")
Signed-off-by: Mike Pattrick 
---
v2: Don't extract variables from ipsec script
---
 ipsec/ovs-monitor-ipsec.in | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in
index 7945162f9..6a71d4f2f 100755
--- a/ipsec/ovs-monitor-ipsec.in
+++ b/ipsec/ovs-monitor-ipsec.in
@@ -21,6 +21,7 @@ import re
 import subprocess
 import sys
 from string import Template
+from packaging.version import parse
 
 import ovs.daemon
 import ovs.db.idl
@@ -457,14 +458,25 @@ conn prevent_unencrypted_vxlan
 CERTKEY_PREFIX = "ovs_certkey_"
 
 def __init__(self, libreswan_root_prefix, args):
+# Collect version infromation
+self.IPSEC = libreswan_root_prefix + "/usr/sbin/ipsec"
+proc = subprocess.Popen([self.IPSEC, "--version"],
+stdout=subprocess.PIPE,
+encoding="latin1")
+pout, perr = proc.communicate()
+
+v = re.match("^Libreswan (.*)$", pout)
+if v and parse(v.group(1)) >= parse("4.0"):
+ipsec_d = args.ipsec_d if args.ipsec_d else "/var/lib/ipsec/nss"
+else:
+ipsec_d = args.ipsec_d if args.ipsec_d else "/etc/ipsec.d"
+
 ipsec_conf = args.ipsec_conf if args.ipsec_conf else "/etc/ipsec.conf"
-ipsec_d = args.ipsec_d if args.ipsec_d else "/etc/ipsec.d"
 ipsec_secrets = (args.ipsec_secrets if args.ipsec_secrets
 else "/etc/ipsec.secrets")
 ipsec_ctl = (args.ipsec_ctl if args.ipsec_ctl
 else "/run/pluto/pluto.ctl")
 
-self.IPSEC = libreswan_root_prefix + "/usr/sbin/ipsec"
 self.IPSEC_CONF = libreswan_root_prefix + ipsec_conf
 self.IPSEC_SECRETS = libreswan_root_prefix + ipsec_secrets
 self.IPSEC_D = "sql:" + libreswan_root_prefix + ipsec_d
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovs-monitor-ipsec: LibreSwan autodetect paths.

2024-03-20 Thread Mike Pattrick
On Wed, Mar 20, 2024 at 2:05 PM Mike Pattrick  wrote:
>
> In v4.0, LibreSwan changed a default paths that had been hardcoded in
> ovs-monitor-ipsec, breaking some uses of this script. This patch adds
> support for both old and newer versions by auto detecting the version
> of LibreSwan and then choosing the correct path.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1975039
> Reported-by: Qijun Ding 
> Fixes: d6afbc00d5b3 ("ipsec: Allow custom file locations.")
> Signed-off-by: Mike Pattrick 
> ---
> v2: Don't extract variables from ipsec script
> ---

Failed with 503 Service Unavailable

Recheck-request: github-robot

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] ofproto-dpif-upcall: Don't mirror packets that aren't modified.

2024-03-12 Thread Mike Pattrick
Previously OVS reset the mirror contents when a packet is modified in
such a way that the packets contents changes. However, this change
incorrectly reset that mirror context when only metadata changes as
well.

Now we check for all metadata fields, instead of just tunnel metadata,
before resetting the mirror context.

Fixes: feed7f677505 ("ofproto-dpif-upcall: Mirror packets that are modified.")
Reported-by: Zhangweiwei 
Signed-off-by: Mike Pattrick 
---
 include/openvswitch/meta-flow.h |   1 +
 lib/meta-flow.c | 109 
 ofproto/ofproto-dpif-xlate.c|   2 +-
 tests/ofproto-dpif.at   |   5 +-
 4 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h
index 3b0220aaa..96aad3933 100644
--- a/include/openvswitch/meta-flow.h
+++ b/include/openvswitch/meta-flow.h
@@ -2305,6 +2305,7 @@ void mf_set_flow_value_masked(const struct mf_field *,
   const union mf_value *mask,
   struct flow *);
 bool mf_is_tun_metadata(const struct mf_field *);
+bool mf_is_metadata(const struct mf_field *);
 bool mf_is_frozen_metadata(const struct mf_field *);
 bool mf_is_pipeline_field(const struct mf_field *);
 bool mf_is_set(const struct mf_field *, const struct flow *);
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index aa7cf1fcb..7ecec334e 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -1788,6 +1788,115 @@ mf_is_tun_metadata(const struct mf_field *mf)
mf->id < MFF_TUN_METADATA0 + TUN_METADATA_NUM_OPTS;
 }
 
+bool
+mf_is_metadata(const struct mf_field *mf)
+{
+switch (mf->id) {
+CASE_MFF_TUN_METADATA:
+case MFF_METADATA:
+case MFF_IN_PORT:
+case MFF_IN_PORT_OXM:
+CASE_MFF_REGS:
+CASE_MFF_XREGS:
+CASE_MFF_XXREGS:
+case MFF_PACKET_TYPE:
+case MFF_DP_HASH:
+case MFF_RECIRC_ID:
+case MFF_CONJ_ID:
+case MFF_ACTSET_OUTPUT:
+case MFF_SKB_PRIORITY:
+case MFF_PKT_MARK:
+case MFF_CT_STATE:
+case MFF_CT_ZONE:
+case MFF_CT_MARK:
+case MFF_CT_LABEL:
+case MFF_CT_NW_PROTO:
+case MFF_CT_NW_SRC:
+case MFF_CT_NW_DST:
+case MFF_CT_IPV6_SRC:
+case MFF_CT_IPV6_DST:
+case MFF_CT_TP_SRC:
+case MFF_CT_TP_DST:
+case MFF_N_IDS:
+return true;
+
+case MFF_TUN_ID:
+case MFF_TUN_SRC:
+case MFF_TUN_DST:
+case MFF_TUN_IPV6_SRC:
+case MFF_TUN_IPV6_DST:
+case MFF_TUN_FLAGS:
+case MFF_TUN_GBP_ID:
+case MFF_TUN_GBP_FLAGS:
+case MFF_TUN_ERSPAN_VER:
+case MFF_TUN_ERSPAN_IDX:
+case MFF_TUN_ERSPAN_DIR:
+case MFF_TUN_ERSPAN_HWID:
+case MFF_TUN_GTPU_FLAGS:
+case MFF_TUN_GTPU_MSGTYPE:
+case MFF_TUN_TTL:
+case MFF_TUN_TOS:
+case MFF_ETH_SRC:
+case MFF_ETH_DST:
+case MFF_ETH_TYPE:
+case MFF_VLAN_TCI:
+case MFF_DL_VLAN:
+case MFF_VLAN_VID:
+case MFF_DL_VLAN_PCP:
+case MFF_VLAN_PCP:
+case MFF_MPLS_LABEL:
+case MFF_MPLS_TC:
+case MFF_MPLS_BOS:
+case MFF_MPLS_TTL:
+case MFF_IPV4_SRC:
+case MFF_IPV4_DST:
+case MFF_IPV6_SRC:
+case MFF_IPV6_DST:
+case MFF_IPV6_LABEL:
+case MFF_IP_PROTO:
+case MFF_IP_DSCP:
+case MFF_IP_DSCP_SHIFTED:
+case MFF_IP_ECN:
+case MFF_IP_TTL:
+case MFF_IP_FRAG:
+case MFF_ARP_OP:
+case MFF_ARP_SPA:
+case MFF_ARP_TPA:
+case MFF_ARP_SHA:
+case MFF_ARP_THA:
+case MFF_TCP_SRC:
+case MFF_TCP_DST:
+case MFF_TCP_FLAGS:
+case MFF_UDP_SRC:
+case MFF_UDP_DST:
+case MFF_SCTP_SRC:
+case MFF_SCTP_DST:
+case MFF_ICMPV4_TYPE:
+case MFF_ICMPV4_CODE:
+case MFF_ICMPV6_TYPE:
+case MFF_ICMPV6_CODE:
+case MFF_ND_TARGET:
+case MFF_ND_SLL:
+case MFF_ND_TLL:
+case MFF_ND_RESERVED:
+case MFF_ND_OPTIONS_TYPE:
+case MFF_NSH_FLAGS:
+case MFF_NSH_TTL:
+case MFF_NSH_MDTYPE:
+case MFF_NSH_NP:
+case MFF_NSH_SPI:
+case MFF_NSH_SI:
+case MFF_NSH_C1:
+case MFF_NSH_C2:
+case MFF_NSH_C3:
+case MFF_NSH_C4:
+return false;
+
+default:
+OVS_NOT_REACHED();
+}
+}
+
 bool
 mf_is_frozen_metadata(const struct mf_field *mf)
 {
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 89f183182..faa364ec8 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -7141,7 +7141,7 @@ reset_mirror_ctx(struct xlate_ctx *ctx, const struct flow 
*flow,
 
 set_field = ofpact_get_SET_FIELD(a);
 mf = set_field->field;
-if (mf_are_prereqs_ok(mf, flow, NULL) && !mf_is_tun_metadata(mf)) {
+if (mf_are_prereqs_ok(mf, flow, NULL) && !mf_is_metadata(mf)) {
 ctx->mirrors = 0;
 }
 return;
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index a1393f7f8..245e209c3 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -5443,7 +5443,8 @@ AT_CLEANU

[ovs-dev] [PATCH] ovs-monitor-ipsec: LibreSwan autodetect paths.

2024-03-13 Thread Mike Pattrick
In v4.0, LibreSwan changed a default paths that had been hardcoded in
ovs-monitor-ipsec, breaking some uses of this script. This patch adds
support for both old and newer versions by auto detecting the location
of these paths from LibreSwan shell script environment variables.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1975039
Reported-by: Qijun Ding 
Fixes: d6afbc00d5b3 ("ipsec: Allow custom file locations.")
Signed-off-by: Mike Pattrick 
---
 ipsec/ovs-monitor-ipsec.in | 31 +++
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in
index 7945162f9..6c28f30f4 100755
--- a/ipsec/ovs-monitor-ipsec.in
+++ b/ipsec/ovs-monitor-ipsec.in
@@ -456,15 +456,38 @@ conn prevent_unencrypted_vxlan
 CERT_PREFIX = "ovs_cert_"
 CERTKEY_PREFIX = "ovs_certkey_"
 
+def collect_environment(self):
+"""Extract important paths from ipsec file."""
+env = {
+"IPSEC_CONF": "/etc/ipsec.conf",
+"IPSEC_NSSDIR": "/etc/ipsec.d",
+"IPSEC_RUNDIR": "/run/pluto"
+}
+try:
+with open(self.IPSEC) as fh:
+e_list = re.findall("^([A-Z_]+)=.*:-(.*)}",
+fh.read(),
+re.MULTILINE)
+except:
+return env
+
+for k, v in e_list:
+env[k] = v
+
+return env
+
 def __init__(self, libreswan_root_prefix, args):
-ipsec_conf = args.ipsec_conf if args.ipsec_conf else "/etc/ipsec.conf"
-ipsec_d = args.ipsec_d if args.ipsec_d else "/etc/ipsec.d"
+self.IPSEC = libreswan_root_prefix + "/usr/sbin/ipsec"
+
+env = self.collect_environment()
+
+ipsec_conf = args.ipsec_conf if args.ipsec_conf else env["IPSEC_CONF"]
+ipsec_d = args.ipsec_d if args.ipsec_d else env["IPSEC_NSSDIR"]
 ipsec_secrets = (args.ipsec_secrets if args.ipsec_secrets
 else "/etc/ipsec.secrets")
 ipsec_ctl = (args.ipsec_ctl if args.ipsec_ctl
-else "/run/pluto/pluto.ctl")
+else os.path.join(env["IPSEC_RUNDIR"], "pluto.ctl"))
 
-self.IPSEC = libreswan_root_prefix + "/usr/sbin/ipsec"
 self.IPSEC_CONF = libreswan_root_prefix + ipsec_conf
 self.IPSEC_SECRETS = libreswan_root_prefix + ipsec_secrets
 self.IPSEC_D = "sql:" + libreswan_root_prefix + ipsec_d
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dpif-netdev: Fix crash due to tunnel offloading on recirculation.

2024-03-22 Thread Mike Pattrick
On Fri, Mar 22, 2024 at 10:41 AM Ilya Maximets  wrote:
>
> Recirculation involves re-parsing the packet from scratch and that
> process is not aware of multiple header levels nor the inner/outer
> offsets.  So, it overwrites offsets with new ones from the outermost
> headers and sets offloading flags that change their meaning when
> the packet is marked for tunnel offloading.
>
> For example:
>
>  1. TCP packet enters OVS.
>  2. TCP packet gets encapsulated into UDP tunnel.
>  3. Recirculation happens.
>  4. Packet is re-parsed after recirculation with miniflow_extract()
> or similar function.
>  5. Packet is marked for UDP checksumming because we parse the
> outermost set of headers.  But since it is tunneled, it means
> inner UDP checksumming.  And that makes no sense, because the
> inner packet is TCP.
>
> This is causing packet drops due to malformed packets or even
> assertions and crashes in the code that is trying to fixup checksums
> for packets using incorrect metadata:
>
>  SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior
>
>  lib/packets.c:2061:15: runtime error:
> member access within null pointer of type 'struct udp_header'
>
>   0 0xbe5221 in packet_udp_complete_csum lib/packets.c:2061:15
>   1 0x7e5662 in dp_packet_ol_send_prepare lib/dp-packet.c:638:9
>   2 0x96ef89 in netdev_send lib/netdev.c:940:9
>   3 0x818e94 in dp_netdev_pmd_flush_output_on_port lib/dpif-netdev.c:5577:9
>   4 0x817606 in dp_netdev_pmd_flush_output_packets lib/dpif-netdev.c:5618:27
>   5 0x81cfa5 in dp_netdev_process_rxq_port lib/dpif-netdev.c:5677:9
>   6 0x7eefe4 in dpif_netdev_run lib/dpif-netdev.c:7001:25
>   7 0x610e87 in type_run ofproto/ofproto-dpif.c:367:9
>   8 0x5b9e80 in ofproto_type_run ofproto/ofproto.c:1879:31
>   9 0x55bbb4 in bridge_run__ vswitchd/bridge.c:3281:9
>  10 0x558b6b in bridge_run vswitchd/bridge.c:3346:5
>  11 0x591dc5 in main vswitchd/ovs-vswitchd.c:130:9
>  12 0x172b89 in __libc_start_call_main (/lib64/libc.so.6+0x27b89)
>  13 0x172c4a in __libc_start_main@GLIBC_2.2.5 (/lib64/libc.so.6+0x27c4a)
>  14 0x47eff4 in _start (vswitchd/ovs-vswitchd+0x47eff4)
>
> Tests added for both IPv4 and IPv6 cases.  Though IPv6 test doesn't
> trigger the issue it's better to have a symmetric test.
>
> Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
> Reported-at: 
> https://mail.openvswitch.org/pipermail/ovs-discuss/2024-March/053014.html
> Signed-off-by: Ilya Maximets 
> ---

I have tested this, and it does fix the segfault here.

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ofproto-dpif-upcall: Don't mirror packets that aren't modified.

2024-03-25 Thread Mike Pattrick
On Mon, Mar 25, 2024 at 3:48 AM Zhangweiwei  wrote:
>
> Hi,
> I have tried this patch, however, there are still some issues when the 
> packets contents are changed across recirculation. On the follow example, 
> packets are modified in recirc_id(0) after mirror, the mirror context reset. 
> Therefore, there are two ICMP request packets are mirrored on port mitapVm71.
>
> In the following example, ICMP packets ared sent from port(11) to port(14),
> [root@localhost ~]# ovs-appctl dpif/dump-flows vds1-br
> ct_state(-new-est-rel-rpl-inv-trk),recirc_id(0),in_port(11),packet_type(ns=0,id=0),eth(src=52:54:00:67:d5:61,dst=68:05:ca:21:d6:e5),eth_type(0x0800),ipv4(src=11.11.70.1,dst=1.1.70.2,proto=1,ttl=64,frag=no),
>  packets:431, bytes:42238, used:0.574s, 
> actions:10,set(eth(src=68:05:ca:21:d6:e5,dst=52:54:00:9a:bf:ed)),set(ipv4(ttl=63)),ct(zone=6),recirc(0x3e8)
> ct_state(+est-rel-rpl),recirc_id(0x3e8),in_port(11),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
>  packets:430, bytes:42140, used:0.574s, actions:10,14
>
> ct_state(-new+est-rel+rpl-inv+trk),recirc_id(0x3e9),in_port(14),packet_type(ns=0,id=0),eth(src=52:54:00:9a:bf:ed,dst=68:05:ca:21:d6:e5),eth_type(0x0800),ipv4(dst=11.11.70.1,proto=1,ttl=64,frag=no),
>  packets:431, bytes:42238, used:0.574s, 
> actions:set(eth(src=68:05:ca:21:d6:e5,dst=52:54:00:67:d5:61)),set(ipv4(ttl=63)),11,10
> ct_state(-trk),recirc_id(0),in_port(14),packet_type(ns=0,id=0),eth(src=52:54:00:9a:bf:ed),eth_type(0x0800),ipv4(src=1.1.70.2,proto=1,frag=no),
>  packets:431, bytes:42238, used:0.574s, actions:ct(zone=6),recirc(0x3e9)
>
> [root@localhost ~]# ovs-appctl dpif/show
> netdev@ovs-netdev: hit:2552 missed:3019
>   vds1-br:
> mitapVm71 14/10: (system)
> tapVm71 5/11: (dpdkvhostuserclient: configured_rx_queues=1, 
> configured_tx_queues=1, mtu=1500, requested_rx_queues=1, 
> requested_tx_queues=1)
> tapVm72 6/14: (dpdkvhostuserclient: configured_rx_queues=1, 
> configured_tx_queues=1, mtu=1500, requested_rx_queues=1, 
> requested_tx_queues=1)
>
> [root@localhost ~]# ovs-tcpdump -i tapVm71
> 14:38:53.702142 IP 11.11.70.1 > 1.1.70.2: ICMP echo request, id 13483, seq 
> 2014, length 64
> 14:38:53.702143 IP 11.11.70.1 > 1.1.70.2: ICMP echo request, id 13483, seq 
> 2014, length 64
> 14:38:53.702185 IP 1.1.70.2 > 11.11.70.1: ICMP echo reply, id 13483, seq 
> 2014, length 64
> 14:38:54.742141 IP 11.11.70.1 > 1.1.70.2: ICMP echo request, id 13483, seq 
> 2015, length 64
> 14:38:54.742143 IP 11.11.70.1 > 1.1.70.2: ICMP echo request, id 13483, seq 
> 2015, length 64
> 14:38:54.742183 IP 1.1.70.2 > 11.11.70.1: ICMP echo reply, id 13483, seq 
> 2015, length 64
> 14:38:55.782142 IP 11.11.70.1 > 1.1.70.2: ICMP echo request, id 13483, seq 
> 2016, length 64
> 14:38:55.782144 IP 11.11.70.1 > 1.1.70.2: ICMP echo request, id 13483, seq 
> 2016, length 64
> 14:38:55.782186 IP 1.1.70.2 > 11.11.70.1: ICMP echo reply, id 13483, seq 
> 2016, length 64


Hello, thanks for the report. Is it possible to run the command
"ovs-tcpdump -i tapVm71 -ennvv" ? I ask because I see your actions
reset the ethernet address. If the ethernet address is different then
this would be the expected behavior, the collection of the packet as
it enters, and then as it exists modified.


Thank you,

Mike


>
> -邮件原件-
> 发件人: Mike Pattrick [mailto:m...@redhat.com]
> 发送时间: 2024年3月13日 1:37
> 收件人: d...@openvswitch.org
> 抄送: Mike Pattrick ; zhangweiwei (RD) 
> 主题: [PATCH] ofproto-dpif-upcall: Don't mirror packets that aren't modified.
>
> Previously OVS reset the mirror contents when a packet is modified in such a 
> way that the packets contents changes. However, this change incorrectly reset 
> that mirror context when only metadata changes as well.
>
> Now we check for all metadata fields, instead of just tunnel metadata, before 
> resetting the mirror context.
>
> Fixes: feed7f677505 ("ofproto-dpif-upcall: Mirror packets that are modified.")
> Reported-by: Zhangweiwei 
> Signed-off-by: Mike Pattrick 
> ---
>  include/openvswitch/meta-flow.h |   1 +
>  lib/meta-flow.c | 109 
>  ofproto/ofproto-dpif-xlate.c|   2 +-
>  tests/ofproto-dpif.at   |   5 +-
>  4 files changed, 114 insertions(+), 3 deletions(-)
>
> diff --git a/include/openvswitch/meta-flow.h 
> b/include/openvswitch/meta-flow.h index 3b0220aaa..96aad3933 100644
> --- a/include/openvswitch/meta-flow.h
> +++ b/include/openvswitch/meta-flow.h
> @@ -2305,6 +2305,7 @@ void mf_set_flow_value_masked(const struct mf_field *,
>const union mf_value *mask,
>struct flow *);  bool mf_is_tun_metadata(const 
> struct mf_field *);
> +bool 

[ovs-dev] [PATCH v5] tunnel: Allow UDP zero checksum with IPv6 tunnels.

2024-03-27 Thread Mike Pattrick
This patch adopts the proposed RFC 6935 by allowing null UDP checksums
even if the tunnel protocol is IPv6. This is already supported by Linux
through the udp6zerocsumtx tunnel option. It is disabled by default and
IPv6 tunnels are flagged as requiring a checksum, but this patch enables
the user to set csum=false on IPv6 tunnels.

Signed-off-by: Mike Pattrick 
---
v2: Changed documentation, and added a NEWS item
v3: NEWS file merge conflict
v4: Better comments, new test
v5: Addressed identified nit's
---
 NEWS  |  4 
 lib/netdev-native-tnl.c   |  2 +-
 lib/netdev-vport.c| 17 +++--
 lib/netdev.h  | 18 +-
 ofproto/tunnel.c  | 10 --
 tests/tunnel-push-pop-ipv6.at |  9 +
 tests/tunnel-push-pop.at  |  7 +++
 tests/tunnel.at   |  2 +-
 vswitchd/vswitch.xml  | 12 +---
 9 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/NEWS b/NEWS
index c9e4064e6..6c8c4a2dc 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,10 @@ Post-v3.3.0
  * Conntrack now supports 'random' flag for selecting ports in a range
while natting and 'persistent' flag for selection of the IP address
from a range.
+ * IPv6 UDP tunnel encapsulation including Geneve and VXLAN will now
+   honour the csum option.  Configuring the interface with
+   "options:csum=false" now has the same effect as the udp6zerocsumtx
+   option has with Linux kernel UDP tunnels.
 
 
 v3.3.0 - 16 Feb 2024
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index dee9ab344..e8258bc4e 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -424,7 +424,7 @@ udp_build_header(const struct netdev_tunnel_config *tnl_cfg,
 udp = netdev_tnl_ip_build_header(data, params, IPPROTO_UDP, 0);
 udp->udp_dst = tnl_cfg->dst_port;
 
-if (params->is_ipv6 || params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
+if (params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
 /* Write a value in now to mark that we should compute the checksum
  * later. 0x is handy because it is transparent to the
  * calculation. */
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 60caa02fb..234a4ebe1 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -702,7 +702,9 @@ set_tunnel_config(struct netdev *dev_, const struct smap 
*args, char **errp)
 tnl_cfg.dst_port = htons(atoi(node->value));
 } else if (!strcmp(node->key, "csum") && has_csum) {
 if (!strcmp(node->value, "true")) {
-tnl_cfg.csum = true;
+tnl_cfg.csum = NETDEV_TNL_CSUM_ENABLED;
+} else if (!strcmp(node->value, "false")) {
+tnl_cfg.csum = NETDEV_TNL_CSUM_DISABLED;
 }
 } else if (!strcmp(node->key, "seq") && has_seq) {
 if (!strcmp(node->value, "true")) {
@@ -850,6 +852,15 @@ set_tunnel_config(struct netdev *dev_, const struct smap 
*args, char **errp)
 }
 }
 
+/* The default csum state for GRE is special as it does have an optional
+ * checksum but the default configuration isn't correlated with IP version
+ * like UDP tunnels are.  Likewise, tunnels with no checksum at all must be
+ * in this state. */
+if (tnl_cfg.csum == NETDEV_TNL_CSUM_DEFAULT &&
+(!has_csum || strstr(type, "gre"))) {
+tnl_cfg.csum = NETDEV_TNL_DEFAULT_NO_CSUM;
+}
+
 enum tunnel_layers layers = tunnel_supported_layers(type, _cfg);
 const char *full_type = (strcmp(type, "vxlan") ? type
  : (tnl_cfg.exts & (1 << OVS_VXLAN_EXT_GPE)
@@ -1026,8 +1037,10 @@ get_tunnel_config(const struct netdev *dev, struct smap 
*args)
 }
 }
 
-if (tnl_cfg->csum) {
+if (tnl_cfg->csum == NETDEV_TNL_CSUM_ENABLED) {
 smap_add(args, "csum", "true");
+} else if (tnl_cfg->csum == NETDEV_TNL_CSUM_DISABLED) {
+smap_add(args, "csum", "false");
 }
 
 if (tnl_cfg->set_seq) {
diff --git a/lib/netdev.h b/lib/netdev.h
index 67a8486bd..5d253157c 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -111,6 +111,22 @@ enum netdev_srv6_flowlabel {
 SRV6_FLOWLABEL_COMPUTE,
 };
 
+enum netdev_tnl_csum {
+/* Default value for UDP tunnels if no configurations is present.  Enforce
+ * checksum calculation in IPv6 tunnels, disable in IPv4 tunnels. */
+NETDEV_TNL_CSUM_DEFAULT = 0,
+
+/* Checksum explicitly to be calculated. */
+NETDEV_TNL_CSUM_ENABLED,
+
+/* Checksum calculation explicitly disabled. */
+NETDEV_TNL_CSUM_DISABLED,
+
+/* A value for when there is no checksum or the default value is no
+ * checksum reguardless of IP version. */
+NETDEV_TNL_DEFAULT_NO_C

Re: [ovs-dev] [PATCH] Userspace: Software fallback for UDP encapsulated TCP segmentation.

2024-03-27 Thread Mike Pattrick
On Wed, Mar 27, 2024 at 1:39 PM Simon Horman  wrote:
>
> On Tue, Feb 20, 2024 at 11:08:55PM -0500, Mike Pattrick wrote:
> > When sending packets that are flagged as requiring segmentation to an
> > interface that doens't support this feature, send the packet to the TSO
> > software fallback instead of dropping it.
> >
> > Signed-off-by: Mike Pattrick 
>
> Hi Mike,
>
> Can I confirm that from your PoV this patch is still awaiting review?
> I ask because it's been sitting around for a while now.

I believe this patch now needs to be modified for the recent
recirculation change. I'll update it for that, give it another once
over, and resubmit.

-M


>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ofproto-dpif-upcall: Don't mirror packets that aren't modified.

2024-03-26 Thread Mike Pattrick
On Mon, Mar 25, 2024 at 10:04 PM Zhangweiwei  wrote:
>
> The ethernet addresses of two ICMP request packets are indeed different . One 
> is original packet and the other is modified. It is an expected behavior 
> according to the code.
> Actually, when a packet sent by port A is changed by flow table and then is 
> sent to itself, we expect to capture this packet. However, when this packet 
> is changed and is sent to another port, should we still capture the packet on 
> port A?

Currently in ovs-tcpdump we capture the packet on ingress and then on
egress if it is modified. You could make the mirror without
ovs-tcpdump, and only set the select_dst_port option but not the
select_src_port one. select_src_port is checked during ingress and
select_dst_port is set during egress.

Hope this helps,
M

>
> [root@localhost infiniband]# ovs-tcpdump -i tapVm71 -nnvve
> 11.11.70.1 > 1.1.70.2: ICMP echo request, id 15498, seq 17, length 64
> 09:36:52.822232 68:05:ca:21:d6:e5 > 52:54:00:67:d5:61, ethertype IPv4 
> (0x0800), length 98: (tos 0x0, ttl 63, id 22101, offset 0, flags [none], 
> proto ICMP (1), length 84)
> 1.1.70.2 > 11.11.70.1: ICMP echo reply, id 15498, seq 17, length 64
> 09:36:53.862137 52:54:00:67:d5:61 > 68:05:ca:21:d6:e5, ethertype IPv4 
> (0x0800), length 98: (tos 0x0, ttl 64, id 26518, offset 0, flags [DF], proto 
> ICMP (1), length 84)
> 11.11.70.1 > 1.1.70.2: ICMP echo request, id 15498, seq 18, length 64
> 09:36:53.862139 68:05:ca:21:d6:e5 > 52:54:00:9a:bf:ed, ethertype IPv4 
> (0x0800), length 98: (tos 0x0, ttl 63, id 26518, offset 0, flags [DF], proto 
> ICMP (1), length 84)
> 11.11.70.1 > 1.1.70.2: ICMP echo request, id 15498, seq 18, length 64
> 09:36:53.862230 68:05:ca:21:d6:e5 > 52:54:00:67:d5:61, ethertype IPv4 
> (0x0800), length 98: (tos 0x0, ttl 63, id 22176, offset 0, flags [none], 
> proto ICMP (1), length 84)
> 1.1.70.2 > 11.11.70.1: ICMP echo reply, id 15498, seq 18, length 64
>
> -邮件原件-
> 发件人: Mike Pattrick [mailto:m...@redhat.com]
> 发送时间: 2024年3月25日 22:26
> 收件人: zhangweiwei (RD) 
> 抄送: d...@openvswitch.org
> 主题: Re: [PATCH] ofproto-dpif-upcall: Don't mirror packets that aren't 
> modified.
>
> On Mon, Mar 25, 2024 at 3:48 AM Zhangweiwei  wrote:
> >
> > Hi,
> > I have tried this patch, however, there are still some issues when the 
> > packets contents are changed across recirculation. On the follow example, 
> > packets are modified in recirc_id(0) after mirror, the mirror context 
> > reset. Therefore, there are two ICMP request packets are mirrored on port 
> > mitapVm71.
> >
> > In the following example, ICMP packets ared sent from port(11) to
> > port(14), [root@localhost ~]# ovs-appctl dpif/dump-flows vds1-br
> > ct_state(-new-est-rel-rpl-inv-trk),recirc_id(0),in_port(11),packet_typ
> > e(ns=0,id=0),eth(src=52:54:00:67:d5:61,dst=68:05:ca:21:d6:e5),eth_type
> > (0x0800),ipv4(src=11.11.70.1,dst=1.1.70.2,proto=1,ttl=64,frag=no),
> > packets:431, bytes:42238, used:0.574s,
> > actions:10,set(eth(src=68:05:ca:21:d6:e5,dst=52:54:00:9a:bf:ed)),set(i
> > pv4(ttl=63)),ct(zone=6),recirc(0x3e8)
> > ct_state(+est-rel-rpl),recirc_id(0x3e8),in_port(11),packet_type(ns=0,i
> > d=0),eth_type(0x0800),ipv4(frag=no), packets:430, bytes:42140,
> > used:0.574s, actions:10,14
> >
> > ct_state(-new+est-rel+rpl-inv+trk),recirc_id(0x3e9),in_port(14),packet
> > _type(ns=0,id=0),eth(src=52:54:00:9a:bf:ed,dst=68:05:ca:21:d6:e5),eth_
> > type(0x0800),ipv4(dst=11.11.70.1,proto=1,ttl=64,frag=no), packets:431,
> > bytes:42238, used:0.574s,
> > actions:set(eth(src=68:05:ca:21:d6:e5,dst=52:54:00:67:d5:61)),set(ipv4
> > (ttl=63)),11,10
> > ct_state(-trk),recirc_id(0),in_port(14),packet_type(ns=0,id=0),eth(src
> > =52:54:00:9a:bf:ed),eth_type(0x0800),ipv4(src=1.1.70.2,proto=1,frag=no
> > ), packets:431, bytes:42238, used:0.574s,
> > actions:ct(zone=6),recirc(0x3e9)
> >
> > [root@localhost ~]# ovs-appctl dpif/show
> > netdev@ovs-netdev: hit:2552 missed:3019
> >   vds1-br:
> > mitapVm71 14/10: (system)
> > tapVm71 5/11: (dpdkvhostuserclient: configured_rx_queues=1, 
> > configured_tx_queues=1, mtu=1500, requested_rx_queues=1, 
> > requested_tx_queues=1)
> > tapVm72 6/14: (dpdkvhostuserclient: configured_rx_queues=1,
> > configured_tx_queues=1, mtu=1500, requested_rx_queues=1,
> > requested_tx_queues=1)
> >
> > [root@localhost ~]# ovs-tcpdump -i tapVm71
> > 14:38:53.702142 IP 11.11.70.1 > 1.1.70.2: ICMP echo request, id 13483,
> > seq 2014, length 64
> > 14:38:53.702143 IP 11.11.70.1 > 1.1.70.2: ICMP echo request, id 13483,
> > seq 2014, length 64

[ovs-dev] [PATCH v2] ovsdb: Don't iterate over rows on empty mutation.

2024-02-22 Thread Mike Pattrick
Previously when an empty mutation was used to count the number of rows
in a table, OVSDB would iterate over all rows twice. First to perform an
RBAC check, and then to perform the no-operation.

This change adds a short circuit to mutate operations with no conditions
and an empty mutation set, returning immediately. One notable change in
functionality is not performing the RBAC check in this condition, as no
mutation actually takes place.

Reported-by: Terry Wilson 
Reported-at: https://issues.redhat.com/browse/FDP-359
Signed-off-by: Mike Pattrick 
---
v2: Added additional non-rbac tests, and support for conditional
counting without the rbac check
---
 ovsdb/execution.c| 26 +++-
 ovsdb/mutation.h |  6 +
 tests/ovsdb-execution.at | 51 
 tests/ovsdb-rbac.at  | 23 ++
 4 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/ovsdb/execution.c b/ovsdb/execution.c
index 8c20c3b54..7ed700632 100644
--- a/ovsdb/execution.c
+++ b/ovsdb/execution.c
@@ -585,6 +585,19 @@ mutate_row_cb(const struct ovsdb_row *row, void *mr_)
 return *mr->error == NULL;
 }
 
+struct count_row_cbdata {
+size_t n_matches;
+};
+
+static bool
+count_row_cb(const struct ovsdb_row *row OVS_UNUSED, void *cr_)
+{
+struct count_row_cbdata *cr = cr_;
+
+cr->n_matches++;
+return true;
+}
+
 static struct ovsdb_error *
 ovsdb_execute_mutate(struct ovsdb_execution *x, struct ovsdb_parser *parser,
  struct json *result)
@@ -609,7 +622,18 @@ ovsdb_execute_mutate(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 error = ovsdb_condition_from_json(table->schema, where, x->symtab,
   );
 }
-if (!error) {
+if (!error && ovsdb_mutation_set_empty()) {
+/* Special case with no mutations, just return the row count. */
+if (ovsdb_condition_empty()) {
+json_object_put(result, "count",
+json_integer_create(hmap_count(>rows)));
+} else {
+struct count_row_cbdata cr = {};
+ovsdb_query(table, , count_row_cb, );
+json_object_put(result, "count",
+json_integer_create(cr.n_matches));
+}
+} else if (!error) {
 mr.n_matches = 0;
 mr.txn = x->txn;
 mr.mutations = 
diff --git a/ovsdb/mutation.h b/ovsdb/mutation.h
index 7566ef199..05d4a262a 100644
--- a/ovsdb/mutation.h
+++ b/ovsdb/mutation.h
@@ -69,4 +69,10 @@ void ovsdb_mutation_set_destroy(struct ovsdb_mutation_set *);
 struct ovsdb_error *ovsdb_mutation_set_execute(
 struct ovsdb_row *, const struct ovsdb_mutation_set *) 
OVS_WARN_UNUSED_RESULT;
 
+static inline bool ovsdb_mutation_set_empty(
+const struct ovsdb_mutation_set *ms)
+{
+return ms->n_mutations == 0;
+}
+
 #endif /* ovsdb/mutation.h */
diff --git a/tests/ovsdb-execution.at b/tests/ovsdb-execution.at
index fd1c7a239..1ffa2b738 100644
--- a/tests/ovsdb-execution.at
+++ b/tests/ovsdb-execution.at
@@ -1201,4 +1201,55 @@ OVSDB_CHECK_EXECUTION([garbage collection],
 [{"rows":[]}]
 ]])])
 
+OVSDB_CHECK_EXECUTION([insert rows, count with mutation],
+  [ordinal_schema],
+  "ordinals",
+  {"op": "insert",
+   "table": "ordinals",
+   "row": {"number": 0, "name": "zero"},
+   "uuid-name": "first"}]]],
+   [[["ordinals",
+  {"op": "insert",
+   "table": "ordinals",
+   "row": {"number": 1, "name": "one"},
+   "uuid-name": "first"}]]],
+   [[["ordinals",
+  {"op": "mutate",
+   "table": "ordinals",
+   "where": [["name", "==", "zero"]],
+   "mutations": []}]]],
+   [[["ordinals",
+  {"op": "mutate",
+   "table": "ordinals",
+   "where": [["name", "==", "one"]],
+   "mutations": []}]]],
+   [[["ordinals",
+  {"op": "insert",
+   "table": "ordinals",
+   "row": {"number": 2, "name": "one"},
+   "uuid-name": "first"}]]],
+   [[["ordinals",
+  {"op": "mutate",
+   "table": "ordinals",
+   "where": [["name", "==", "one"]],
+   "mutations": []}]]],
+   [[["ordinals",
+  {"op": "delete",
+   "table": "ordinals",
+   "where": [["name", &

[ovs-dev] [PATCH v7 1/2] ofproto-dpif-mirror: Reduce number of function parameters.

2024-02-26 Thread Mike Pattrick
Previously the mirror_set() and mirror_get() functions took a large
number of parameters, which was inefficient and difficult to read and
extend. This patch moves most of the parameters into a struct.

Signed-off-by: Mike Pattrick 
Acked-by: Simon Horman 
Acked-by: Eelco Chaudron 
---
 ofproto/ofproto-dpif-mirror.c | 61 ++-
 ofproto/ofproto-dpif-mirror.h | 42 +++-
 ofproto/ofproto-dpif-xlate.c  | 29 -
 ofproto/ofproto-dpif.c| 23 ++---
 4 files changed, 91 insertions(+), 64 deletions(-)

diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c
index 343b75f0e..a84c843b3 100644
--- a/ofproto/ofproto-dpif-mirror.c
+++ b/ofproto/ofproto-dpif-mirror.c
@@ -207,19 +207,23 @@ mirror_bundle_dst(struct mbridge *mbridge, struct 
ofbundle *ofbundle)
 }
 
 int
-mirror_set(struct mbridge *mbridge, void *aux, const char *name,
-   struct ofbundle **srcs, size_t n_srcs,
-   struct ofbundle **dsts, size_t n_dsts,
-   unsigned long *src_vlans, struct ofbundle *out_bundle,
-   uint16_t snaplen,
-   uint16_t out_vlan)
+mirror_set(struct mbridge *mbridge, void *aux,
+   const struct ofproto_mirror_settings *ms,
+   const struct mirror_bundles *mb)
+
 {
 struct mbundle *mbundle, *out;
 mirror_mask_t mirror_bit;
 struct mirror *mirror;
 struct hmapx srcs_map;  /* Contains "struct ofbundle *"s. */
 struct hmapx dsts_map;  /* Contains "struct ofbundle *"s. */
+uint16_t out_vlan;
+
+if (!ms || !mbridge) {
+return EINVAL;
+}
 
+out_vlan = ms->out_vlan;
 mirror = mirror_lookup(mbridge, aux);
 if (!mirror) {
 int idx;
@@ -227,7 +231,7 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 idx = mirror_scan(mbridge);
 if (idx < 0) {
 VLOG_WARN("maximum of %d port mirrors reached, cannot create %s",
-  MAX_MIRRORS, name);
+  MAX_MIRRORS, ms->name);
 return EFBIG;
 }
 
@@ -242,8 +246,8 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 unsigned long *vlans = ovsrcu_get(unsigned long *, >vlans);
 
 /* Get the new configuration. */
-if (out_bundle) {
-out = mbundle_lookup(mbridge, out_bundle);
+if (mb->out_bundle) {
+out = mbundle_lookup(mbridge, mb->out_bundle);
 if (!out) {
 mirror_destroy(mbridge, mirror->aux);
 return EINVAL;
@@ -252,16 +256,16 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 } else {
 out = NULL;
 }
-mbundle_lookup_multiple(mbridge, srcs, n_srcs, _map);
-mbundle_lookup_multiple(mbridge, dsts, n_dsts, _map);
+mbundle_lookup_multiple(mbridge, mb->srcs, mb->n_srcs, _map);
+mbundle_lookup_multiple(mbridge, mb->dsts, mb->n_dsts, _map);
 
 /* If the configuration has not changed, do nothing. */
 if (hmapx_equals(_map, >srcs)
 && hmapx_equals(_map, >dsts)
-&& vlan_bitmap_equal(vlans, src_vlans)
+&& vlan_bitmap_equal(vlans, ms->src_vlans)
 && mirror->out == out
 && mirror->out_vlan == out_vlan
-&& mirror->snaplen == snaplen)
+&& mirror->snaplen == ms->snaplen)
 {
 hmapx_destroy(_map);
 hmapx_destroy(_map);
@@ -275,15 +279,15 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 hmapx_swap(_map, >dsts);
 hmapx_destroy(_map);
 
-if (vlans || src_vlans) {
+if (vlans || ms->src_vlans) {
 ovsrcu_postpone(free, vlans);
-vlans = vlan_bitmap_clone(src_vlans);
+vlans = vlan_bitmap_clone(ms->src_vlans);
 ovsrcu_set(>vlans, vlans);
 }
 
 mirror->out = out;
 mirror->out_vlan = out_vlan;
-mirror->snaplen = snaplen;
+mirror->snaplen = ms->snaplen;
 
 /* Update mbundles. */
 mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
@@ -406,23 +410,22 @@ mirror_update_stats(struct mbridge *mbridge, 
mirror_mask_t mirrors,
 /* Retrieves the mirror numbered 'index' in 'mbridge'.  Returns true if such a
  * mirror exists, false otherwise.
  *
- * If successful, '*vlans' receives the mirror's VLAN membership information,
+ * If successful 'mc->vlans' receives the mirror's VLAN membership information,
  * either a null pointer if the mirror includes all VLANs or a 4096-bit bitmap
  * in which a 1-bit indicates that the mirror includes a particular VLAN,
- * '*dup_mirrors' receives a bitmap of mirrors whose output duplicates mirror
- * 'index', '*out' receives the output ofbundle (if any), and '*out_vlan'
- * receives the output VLAN (if any).
+ * 'mc->dup_mirrors' receives a bitmap of mirrors whose output duplicates
+ * mirror 'index

[ovs-dev] [PATCH v7 2/2] ofproto-dpif-mirror: Add support for pre-selection filter.

2024-02-26 Thread Mike Pattrick
Currently a bridge mirror will collect all packets and tools like
ovs-tcpdump can apply additional filters after they have already been
duplicated by vswitchd. This can result in inefficient collection.

This patch adds support to apply pre-selection to bridge mirrors, which
can limit which packets are mirrored based on flow metadata. This
significantly improves overall vswitchd performance during mirroring if
only a subset of traffic is required.

Signed-off-by: Mike Pattrick 
---
v7:
 - Make sure filter mask is added to masks of non-matching flows.
 - Added additional tests.
---
 Documentation/ref/ovs-tcpdump.8.rst |   8 +-
 NEWS|   3 +
 lib/flow.c  |  21 +++-
 lib/flow.h  |  12 +++
 ofproto/ofproto-dpif-mirror.c   |  78 ++-
 ofproto/ofproto-dpif-mirror.h   |  12 ++-
 ofproto/ofproto-dpif-xlate.c|  26 -
 ofproto/ofproto-dpif.c  |   9 +-
 ofproto/ofproto-dpif.h  |   6 ++
 ofproto/ofproto.c   |   4 +-
 ofproto/ofproto.h   |   3 +
 tests/ofproto-dpif.at   | 142 
 utilities/ovs-tcpdump.in|  13 ++-
 vswitchd/bridge.c   |  13 ++-
 vswitchd/vswitch.ovsschema  |   5 +-
 vswitchd/vswitch.xml|  13 +++
 16 files changed, 343 insertions(+), 25 deletions(-)

diff --git a/Documentation/ref/ovs-tcpdump.8.rst 
b/Documentation/ref/ovs-tcpdump.8.rst
index b9f8cdf6f..e21e61211 100644
--- a/Documentation/ref/ovs-tcpdump.8.rst
+++ b/Documentation/ref/ovs-tcpdump.8.rst
@@ -61,8 +61,14 @@ Options
 
   If specified, mirror all ports (optional).
 
+* ``--filter ``
+
+  If specified, only mirror flows that match the provided OpenFlow filter.
+  The available fields are documented in ``ovs-fields(7)``.
+
 See Also
 
 
 ``ovs-appctl(8)``, ``ovs-vswitchd(8)``, ``ovs-pcap(1)``,
-``ovs-tcpundump(1)``, ``tcpdump(8)``, ``wireshark(8)``.
+``ovs-fields(7)``, ``ovs-tcpundump(1)``, ``tcpdump(8)``,
+``wireshark(8)``.
diff --git a/NEWS b/NEWS
index c9e4064e6..35f7eb0c7 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,9 @@ Post-v3.3.0
  * Conntrack now supports 'random' flag for selecting ports in a range
while natting and 'persistent' flag for selection of the IP address
from a range.
+   - OVSDB:
+ * Added a new filter column in the Mirror table which can be used to
+   apply filters to mirror ports.
 
 
 v3.3.0 - 16 Feb 2024
diff --git a/lib/flow.c b/lib/flow.c
index 8e3402388..a088bdc86 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -3569,7 +3569,7 @@ miniflow_equal_in_minimask(const struct miniflow *a, 
const struct miniflow *b,
 return true;
 }
 
-/* Returns true if 'a' and 'b' are equal at the places where there are 1-bits
+/* Returns true if 'a' and 'b' are equal at the places where there are 0-bits
  * in 'mask', false if they differ. */
 bool
 miniflow_equal_flow_in_minimask(const struct miniflow *a, const struct flow *b,
@@ -3587,6 +3587,25 @@ miniflow_equal_flow_in_minimask(const struct miniflow 
*a, const struct flow *b,
 return true;
 }
 
+/* Returns false if 'a' and 'b' differ in places where there are 1-bits in
+ * 'wc', true otherwise. */
+bool
+miniflow_equal_flow_in_flow_wc(const struct miniflow *a, const struct flow *b,
+   const struct flow_wildcards *wc)
+{
+const struct flow *wc_masks = >masks;
+size_t idx;
+
+FLOWMAP_FOR_EACH_INDEX (idx, a->map) {
+if ((miniflow_get(a, idx) ^ flow_u64_value(b, idx)) &
+flow_u64_value(wc_masks, idx)) {
+return false;
+}
+}
+
+return true;
+}
+
 
 void
 minimask_init(struct minimask *mask, const struct flow_wildcards *wc)
diff --git a/lib/flow.h b/lib/flow.h
index 75a9be3c1..a644be39d 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -748,6 +748,9 @@ bool miniflow_equal_in_minimask(const struct miniflow *a,
 bool miniflow_equal_flow_in_minimask(const struct miniflow *a,
  const struct flow *b,
  const struct minimask *);
+bool miniflow_equal_flow_in_flow_wc(const struct miniflow *a,
+const struct flow *b,
+const struct flow_wildcards *);
 uint32_t miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis);
 
 
@@ -939,6 +942,15 @@ flow_union_with_miniflow(struct flow *dst, const struct 
miniflow *src)
 flow_union_with_miniflow_subset(dst, src, src->map);
 }
 
+/* Perform a bitwise OR of minimask 'src' mask data with the equivalent
+ * fields in 'dst', storing the result in 'dst'. */
+static inline void
+flow_wildcards_union_with_minimask(struct flow_wildcards *dst,
+   const struct minimask *src)
+{
+flow_union_with_miniflow_subset(>masks, >masks, src->masks.map);
+}
+
 static inline bool is_ct_valid(c

Re: [ovs-dev] [PATCH 2/5] Documentation: Correct spelling errors.

2024-02-27 Thread Mike Pattrick
On Tue, Feb 27, 2024 at 10:37 AM Simon Horman  wrote:
>
> Correct spelling errors in .rst files flagged by codespell.
>
> Signed-off-by: Simon Horman 
> ---

These look correct to me.

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 5/5] Documentation: Update links to upstream Kernel documentation.

2024-02-27 Thread Mike Pattrick
On Tue, Feb 27, 2024 at 10:37 AM Simon Horman  wrote:
>
> This updates links to several upstream Kernel documents.
>
> 1. Lore is now the canonical archive for the netdev mailing list
>
> 2. net-next is now maintained by the netdev team,
>of which David Miller is currently a member,
>rather than only by David.
>
>Also, use HTTPS rather than HTTP.
>
> 3. The Netdev FAQ has evolved into the Netdev Maintainer Handbook.
>
> 4. The Kernel security document link was dead,
>provide the current canonical location for this document instead.
>
> 1., 2. & 3. Found by inspection
> 4. Flagged by check-docs
>
> Signed-off-by: Simon Horman 

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 3/5] Documentation: Anuket project updates.

2024-02-27 Thread Mike Pattrick
On Tue, Feb 27, 2024 at 10:36 AM Simon Horman  wrote:
>
> The Anuket was formed by a merger of OPNFV and CNTT [1].
>
> Also, VswitchPerf, aka vsperf, formerly an OPNFV project,
> has been renamed ViNePerf [2].
>
> Update links and documentation accordingly.
>
> The old links were broken, this was flagged by make check-docs
>
> [1] 
> https://anuket.io/news/2021/01/27/lf-networking-launches-anuket-an-open-source-project-to-accelerate-infrastructure-compliance-interoperability-and-5g-deployments/
> [2] 
> https://docs.opnfv.org/projects/vineperf/en/latest/release/release-notes/release-notes.html
>
> Signed-off-by: Simon Horman 
> ---

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 4/5] Documentatoin: Update Pacemaker link.

2024-02-27 Thread Mike Pattrick
On Tue, Feb 27, 2024 at 10:36 AM Simon Horman  wrote:
>
> Update link to OCF Resource Agents documentation as the existing link
> is broken. Also, use HTTPS.
>
> Broken link flagged by make check-docs
>
> Signed-off-by: Simon Horman 

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v5 1/2] util: Support checking for kernel versions.

2024-02-27 Thread Mike Pattrick
On Mon, Feb 26, 2024 at 4:22 AM Felix Huettner via dev
 wrote:
>
> Extract checking for a given kernel version to a separate function.
> It will be used also in the next patch.
>
> Signed-off-by: Felix Huettner 
> ---
> v4->v5:
> - fix wrong ifdef that broke on macos
> - fix ovs_kernel_is_version_or_newer working in reverse than desired
> - ovs_kernel_is_version_or_newer now always returns false if uname
>   errors (Thanks Eelco)
> v4:
> - extract function to check kernel version
>  lib/netdev-linux.c | 14 +++---
>  lib/util.c | 27 +++
>  lib/util.h |  4 
>  3 files changed, 34 insertions(+), 11 deletions(-)
>
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index bf91ef462..51bd71ae3 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -6427,18 +6427,10 @@ getqdisc_is_safe(void)
>  static bool safe = false;
>
>  if (ovsthread_once_start()) {
> -struct utsname utsname;
> -int major, minor;
> -
> -if (uname() == -1) {
> -VLOG_WARN("uname failed (%s)", ovs_strerror(errno));
> -} else if (!ovs_scan(utsname.release, "%d.%d", , )) {
> -VLOG_WARN("uname reported bad OS release (%s)", utsname.release);
> -} else if (major < 2 || (major == 2 && minor < 35)) {
> -VLOG_INFO("disabling unsafe RTM_GETQDISC in Linux kernel %s",
> -  utsname.release);
> -} else {
> +if (ovs_kernel_is_version_or_newer(2, 35)) {
>  safe = true;
> +} else {
> +VLOG_INFO("disabling unsafe RTM_GETQDISC in Linux kernel");
>  }
>  ovsthread_once_done();
>  }
> diff --git a/lib/util.c b/lib/util.c
> index 3fb3a4b40..f5b2da095 100644
> --- a/lib/util.c
> +++ b/lib/util.c
> @@ -27,6 +27,7 @@
>  #include 
>  #ifdef __linux__
>  #include 
> +#include 

This import can now be removed from netdev-linux (I believe).

>  #endif
>  #include 
>  #include 
> @@ -2500,3 +2501,29 @@ OVS_CONSTRUCTOR(winsock_start) {
> }
>  }
>  #endif
> +
> +#ifdef __linux__
> +bool
> +ovs_kernel_is_version_or_newer(int target_major, int target_minor)
> +{
> +static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
> +static int current_major, current_minor = -1;
> +
> +if (ovsthread_once_start()) {
> +struct utsname utsname;
> +
> +if (uname() == -1) {
> +VLOG_WARN("uname failed (%s)", ovs_strerror(errno));
> +} else if (!ovs_scan(utsname.release, "%d.%d",
> +_major, _minor)) {
> +VLOG_WARN("uname reported bad OS release (%s)", utsname.release);
> +}
> +ovsthread_once_done();
> +}
> +if (current_major == -1 || current_minor == -1) {
> +return false;
> +}
> +return current_major > target_major || (
> +current_major == target_major && current_minor > target_minor);

Shouldn't this be "current_minor >= target_minor" ?

-M

> +}
> +#endif
> diff --git a/lib/util.h b/lib/util.h
> index f2d45bcac..55718fd87 100644
> --- a/lib/util.h
> +++ b/lib/util.h
> @@ -611,4 +611,8 @@ int ftruncate(int fd, off_t length);
>  }
>  #endif
>
> +#ifdef __linux__
> +bool ovs_kernel_is_version_or_newer(int target_major, int target_minor);
> +#endif
> +
>  #endif /* util.h */
>
> base-commit: 166ee41d282c506d100bc2185d60af277121b55b
> --
> 2.43.2
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/5] Documentation: Extend copyright to 2024.

2024-02-27 Thread Mike Pattrick
On Tue, Feb 27, 2024 at 10:36 AM Simon Horman  wrote:
>
> IANAL, but I think we can extend the copyright attached
> to documentation to cover the current year: we are still
> actively working on the documentation.
>
> Signed-off-by: Simon Horman 

Acked-by: Mike Pattrick 

I wonder if it's valid to set the end date to datetime.now().year

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovsdb: Don't iterate over rows on empty mutation.

2024-02-28 Thread Mike Pattrick
On Wed, Feb 28, 2024 at 8:41 AM Ilya Maximets  wrote:
>
> On 2/22/24 17:37, Mike Pattrick wrote:
> > Previously when an empty mutation was used to count the number of rows
> > in a table, OVSDB would iterate over all rows twice. First to perform an
> > RBAC check, and then to perform the no-operation.
> >
> > This change adds a short circuit to mutate operations with no conditions
> > and an empty mutation set, returning immediately. One notable change in
> > functionality is not performing the RBAC check in this condition, as no
> > mutation actually takes place.
> >
> > Reported-by: Terry Wilson 
> > Reported-at: https://issues.redhat.com/browse/FDP-359
> > Signed-off-by: Mike Pattrick 
> > ---
> > v2: Added additional non-rbac tests, and support for conditional
> > counting without the rbac check
> > ---
> >  ovsdb/execution.c| 26 +++-
> >  ovsdb/mutation.h |  6 +
> >  tests/ovsdb-execution.at | 51 
> >  tests/ovsdb-rbac.at  | 23 ++
> >  4 files changed, 105 insertions(+), 1 deletion(-)
>
> Hi, Mike.  Thanks for v2!  I didn't test, but it looks good in general.
> See one comment inline.
>
> Best regards, Ilya Maximets.
>
> >
> > diff --git a/ovsdb/execution.c b/ovsdb/execution.c
> > index 8c20c3b54..7ed700632 100644
> > --- a/ovsdb/execution.c
> > +++ b/ovsdb/execution.c
> > @@ -585,6 +585,19 @@ mutate_row_cb(const struct ovsdb_row *row, void *mr_)
> >  return *mr->error == NULL;
> >  }
> >
> > +struct count_row_cbdata {
> > +size_t n_matches;
> > +};
>
> Do we actually need this structure?  It only has one element.
> We should be able to just pass a counter around directly.

It seemed more thematic to me at the time, but I can change this.

-M

>
> > +
> > +static bool
> > +count_row_cb(const struct ovsdb_row *row OVS_UNUSED, void *cr_)
> > +{
> > +struct count_row_cbdata *cr = cr_;
> > +
> > +cr->n_matches++;
> > +return true;
> > +}
> > +
> >  static struct ovsdb_error *
> >  ovsdb_execute_mutate(struct ovsdb_execution *x, struct ovsdb_parser 
> > *parser,
> >   struct json *result)
> > @@ -609,7 +622,18 @@ ovsdb_execute_mutate(struct ovsdb_execution *x, struct 
> > ovsdb_parser *parser,
> >  error = ovsdb_condition_from_json(table->schema, where, x->symtab,
> >);
> >  }
> > -if (!error) {
> > +if (!error && ovsdb_mutation_set_empty()) {
> > +/* Special case with no mutations, just return the row count. */
> > +if (ovsdb_condition_empty()) {
> > +json_object_put(result, "count",
> > +json_integer_create(hmap_count(>rows)));
> > +} else {
> > +struct count_row_cbdata cr = {};
> > +ovsdb_query(table, , count_row_cb, );
> > +json_object_put(result, "count",
> > +json_integer_create(cr.n_matches));
> > +}
> > +} else if (!error) {
> >  mr.n_matches = 0;
> >  mr.txn = x->txn;
> >  mr.mutations = 
> > diff --git a/ovsdb/mutation.h b/ovsdb/mutation.h
> > index 7566ef199..05d4a262a 100644
> > --- a/ovsdb/mutation.h
> > +++ b/ovsdb/mutation.h
> > @@ -69,4 +69,10 @@ void ovsdb_mutation_set_destroy(struct 
> > ovsdb_mutation_set *);
> >  struct ovsdb_error *ovsdb_mutation_set_execute(
> >  struct ovsdb_row *, const struct ovsdb_mutation_set *) 
> > OVS_WARN_UNUSED_RESULT;
> >
> > +static inline bool ovsdb_mutation_set_empty(
> > +const struct ovsdb_mutation_set *ms)
> > +{
> > +return ms->n_mutations == 0;
> > +}
> > +
> >  #endif /* ovsdb/mutation.h */
> > diff --git a/tests/ovsdb-execution.at b/tests/ovsdb-execution.at
> > index fd1c7a239..1ffa2b738 100644
> > --- a/tests/ovsdb-execution.at
> > +++ b/tests/ovsdb-execution.at
> > @@ -1201,4 +1201,55 @@ OVSDB_CHECK_EXECUTION([garbage collection],
> >  [{"rows":[]}]
> >  ]])])
> >
> > +OVSDB_CHECK_EXECUTION([insert rows, count with mutation],
> > +  [ordinal_schema],
> > +  "ordinals",
> > +  {"op": "insert",
> > +   "table": "ordinals",
> > +   "row": {"number": 0, "name": "zero"},
> > +   "uuid-n

Re: [ovs-dev] [PATCH v2] conntrack: Fix flush not flushing all elements.

2024-02-27 Thread Mike Pattrick
On Mon, Feb 26, 2024 at 5:50 AM Xavier Simonart  wrote:
>
> On netdev datapath, when a ct element was cleaned, the cmap
> could be shrinked, potentially causing some elements to be skipped
> in the flush iteration.
>
> Fixes: 967bb5c5cd90 ("conntrack: Add rcu support.")
> Signed-off-by: Xavier Simonart 

Thank you for the patch, I was able to test this out, verify the issue
is as you described, and that your patch fixes the problem.

> ---
> v2: - Updated commit message.
> - Use compose-packet instead of hex packet content.
> - Use dnl for comments.
> - Remove unnecessary errors in OVS_TRAFFIC_VSWITCHD_STOP.
> - Rebased on origin/master.
> ---
>  lib/conntrack.c | 14 
>  lib/conntrack.h |  1 +
>  tests/system-traffic.at | 47 +
>  3 files changed, 52 insertions(+), 10 deletions(-)
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 8a7056bac..5786424f6 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -2651,25 +2651,19 @@ conntrack_dump_start(struct conntrack *ct, struct 
> conntrack_dump *dump,
>
>  dump->ct = ct;
>  *ptot_bkts = 1; /* Need to clean up the callers. */
> +dump->cursor = cmap_cursor_start(>conns);
>  return 0;
>  }
>
>  int
>  conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
>  {
> -struct conntrack *ct = dump->ct;
>  long long now = time_msec();
>
> -for (;;) {
> -struct cmap_node *cm_node = cmap_next_position(>conns,
> -   >cm_pos);
> -if (!cm_node) {
> -break;
> -}
> -struct conn_key_node *keyn;
> -struct conn *conn;
> +struct conn_key_node *keyn;
> +struct conn *conn;
>
> -INIT_CONTAINER(keyn, cm_node, cm_node);
> +CMAP_CURSOR_FOR_EACH_CONTINUE (keyn, cm_node, >cursor) {
>  if (keyn->dir != CT_DIR_FWD) {
>  continue;
>  }
> diff --git a/lib/conntrack.h b/lib/conntrack.h
> index ee7da099e..aa12a1847 100644
> --- a/lib/conntrack.h
> +++ b/lib/conntrack.h
> @@ -109,6 +109,7 @@ struct conntrack_dump {
>  union {
>  struct cmap_position cm_pos;

cm_pos is now dead code.

>  struct hmap_position hmap_pos;
> +struct cmap_cursor cursor;
>  };
>  bool filter_zone;
>  uint16_t zone;
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 98e494abf..34f93b2e5 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -8389,6 +8389,53 @@ AT_CHECK([ovs-pcap client.pcap | grep 
> 20102000], [0], [dnl
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>
> +AT_SETUP([conntrack - Flush many conntrack entries by port])
> +CHECK_CONNTRACK()
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +
> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
> +
> +AT_DATA([flows.txt], [dnl
> +priority=100,in_port=1,udp,action=ct(zone=1,commit),2
> +])
> +
> +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
> +
> +dnl 20 packets from port 1 and 1 packet from port 2.
> +flow_l3="\
> +eth_src=50:54:00:00:00:09,eth_dst=50:54:00:00:00:0a,dl_type=0x0800,\
> +nw_src=10.1.1.1,nw_dst=10.1.1.2,nw_proto=17,nw_ttl=64,nw_frag=no"
> +
> +for i in $(seq 1 20); do
> +frame=$(ovs-ofctl compose-packet --bare "$flow_l3, udp_src=1,udp_dst=$i")
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 
> packet=$frame actions=resubmit(,0)"])
> +done
> +frame=$(ovs-ofctl compose-packet --bare "$flow_l3, udp_src=2,udp_dst=1")
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=$frame 
> actions=resubmit(,0)"])
> +
> +: > conntrack
> +
> +for i in $(seq 1 20); do
> +echo 
> "udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=${i}),reply=(src=10.1.1.2,dst=10.1.1.1,sport=${i},dport=1),zone=1"
>  >> conntrack
> +done
> +echo 
> "udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=2,dport=1),reply=(src=10.1.1.2,dst=10.1.1.1,sport=1,dport=2),zone=1"
>  >> conntrack
> +
> +sort conntrack > expout
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep -F "src=10.1.1.1," | sort 
> ], [0], [expout])
> +
> +dnl Check that flushing conntrack by port 1 flush all ct for port 1 but 
> keeps ct for port 2.
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=1'])
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep -F "src=10.1.1.1," | sort 
> ], [0], [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=2,dport=1),reply=(src=10.1.1.2,dst=10.1.1.1,sport=1,dport=2),zone=1
> +])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
>  AT_BANNER([IGMP])
>
>  AT_SETUP([IGMP - flood under normal action])
> --
> 2.41.0
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

___
dev mailing list
d...@openvswitch.org

[ovs-dev] [PATCH v2] dp-packet: Don't offload inner csum if outer isn't supported.

2024-02-26 Thread Mike Pattrick
Some network cards support inner checksum offloading but not outer
checksum offloading. Currently OVS will resolve that outer checksum but
allows the network card to resolve the inner checksum, invalidating the
outer checksum in the process.

Now if we can't offload outer checksums, we don't offload inner either.

Reported-at: https://issues.redhat.com/browse/FDP-363
Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
Signed-off-by: Mike Pattrick 
---
nb: I also tested a more complex patch that only resolved the inner
checksum and offloaded the UDP layer. This didn't noticably improve
performance.
v2: Added IPv4 flag
---
 lib/dp-packet.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 305822293..df7bf8e6b 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -592,6 +592,18 @@ dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t 
flags)
 if (dp_packet_hwol_is_tunnel_geneve(p) ||
 dp_packet_hwol_is_tunnel_vxlan(p)) {
 tnl_inner = true;
+
+/* If the TX interface doesn't support UDP tunnel offload but does
+ * support inner checksum offload and an outer UDP checksum is
+ * required, then we can't offload inner checksum either. As that would
+ * invalidate the outer checksum. */
+if (!(flags & NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM) &&
+dp_packet_hwol_is_outer_udp_cksum(p)) {
+flags &= ~(NETDEV_TX_OFFLOAD_TCP_CKSUM |
+   NETDEV_TX_OFFLOAD_UDP_CKSUM |
+   NETDEV_TX_OFFLOAD_SCTP_CKSUM |
+   NETDEV_TX_OFFLOAD_IPV4_CKSUM);
+}
 }
 
 if (dp_packet_hwol_tx_ip_csum(p)) {
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3] conntrack: Fix flush not flushing all elements.

2024-03-04 Thread Mike Pattrick
On Mon, Mar 4, 2024 at 10:22 AM Xavier Simonart  wrote:
>
> On netdev datapath, when a ct element was cleaned, the cmap
> could be shrinked, potentially causing some elements to be skipped
> in the flush iteration.
>
> Fixes: 967bb5c5cd90 ("conntrack: Add rcu support.")
> Signed-off-by: Xavier Simonart 

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 1/6] netdev-offload: Fix null pointer dereference' warnings on dump creation.

2024-05-23 Thread Mike Pattrick
Clang's static analyzer will complain about a null pointer dereference
because dumps can be set to null and then there is a loop where it could
have been written to. This is a false positive, but only because the
netdev dpif type won't change during this loop.

Instead, return early from the netdev_ports_flow_dump_create function if
dumps is NULL.

Signed-off-by: Mike Pattrick 
---
 lib/netdev-offload.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index 931d634e1..8a9d36555 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -626,8 +626,8 @@ netdev_ports_traverse(const char *dpif_type,
 struct netdev_flow_dump **
 netdev_ports_flow_dump_create(const char *dpif_type, int *ports, bool terse)
 {
+struct netdev_flow_dump **dumps = NULL;
 struct port_to_netdev_data *data;
-struct netdev_flow_dump **dumps;
 int count = 0;
 int i = 0;
 
@@ -638,7 +638,11 @@ netdev_ports_flow_dump_create(const char *dpif_type, int 
*ports, bool terse)
 }
 }
 
-dumps = count ? xzalloc(sizeof *dumps * count) : NULL;
+if (!count) {
+goto unlock;
+}
+
+dumps = xzalloc(sizeof *dumps * count);
 
 HMAP_FOR_EACH (data, portno_node, _to_netdev) {
 if (netdev_get_dpif_type(data->netdev) == dpif_type) {
@@ -650,6 +654,8 @@ netdev_ports_flow_dump_create(const char *dpif_type, int 
*ports, bool terse)
 i++;
 }
 }
+
+unlock:
 ovs_rwlock_unlock(_to_netdev_rwlock);
 
 *ports = i;
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 2/6] netdev-native-tnl: Fix use of uninitialized offset on SRv6 header pop.

2024-05-23 Thread Mike Pattrick
Clang's static analyzer will complain about uninitialized value 'hlen'
because we weren't properly checking the error code from a function that
would have initialized the value.

Instead, add a check for that return code.

Signed-off-by: Mike Pattrick 
Fixes: 03fc1ad78521 ("userspace: Add SRv6 tunnel support.")
---
 lib/netdev-native-tnl.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index dee9ab344..6bcc00d8c 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -1068,7 +1068,10 @@ netdev_srv6_pop_header(struct dp_packet *packet)
 }
 
 pkt_metadata_init_tnl(md);
-netdev_tnl_ip_extract_tnl_md(packet, tnl, );
+if (!netdev_tnl_ip_extract_tnl_md(packet, tnl, )) {
+goto err;
+}
+
 dp_packet_reset_packet(packet, hlen);
 
 return packet;
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 3/6] dpctl: Fix uninitialized value when deleting flows.

2024-05-23 Thread Mike Pattrick
Clang's static analyzer will complain about an uninitialized value
because we weren't setting a value for ufid_generated in all code paths.

Now we initialize this on declaration. This patch also corrects the
reverse x-mass of variable declaration.

Fixes: bbe2e3928747 ("dpctl: Fix broken flow deletion via ovs-dpctl due to 
missing ufid.")
Signed-off-by: Mike Pattrick 
---
 lib/dpctl.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/lib/dpctl.c b/lib/dpctl.c
index 3c555a559..a70df5342 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -1359,19 +1359,17 @@ static int
 dpctl_del_flow_dpif(struct dpif *dpif, const char *key_s,
 struct dpctl_params *dpctl_p)
 {
+struct dpif_port_dump port_dump;
 struct dpif_flow_stats stats;
+bool ufid_generated = false;
 struct dpif_port dpif_port;
-struct dpif_port_dump port_dump;
-struct ofpbuf key;
+bool ufid_present = false;
+struct simap port_names;
 struct ofpbuf mask; /* To be ignored. */
-
+struct ofpbuf key;
 ovs_u128 ufid;
-bool ufid_generated;
-bool ufid_present;
-struct simap port_names;
 int n, error;
 
-ufid_present = false;
 n = odp_ufid_from_string(key_s, );
 if (n < 0) {
 dpctl_error(dpctl_p, -n, "parsing flow ufid");
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 6/6] netdev-linux: Initialize link speed in error conditions.

2024-05-23 Thread Mike Pattrick
Clang's static analyzer noted that the output from
netdev_linux_get_speed_locked can be checked even if this function
doesn't set any values.

Now we always set those values to a sane default in all cases.

Fixes: 6240c0b4c80e ("netdev: Add netdev_get_speed() to netdev API.")
Signed-off-by: Mike Pattrick 
---
 lib/netdev-linux.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 8b855bdc4..83c19618c 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2726,6 +2726,8 @@ netdev_linux_get_speed_locked(struct netdev_linux *netdev,
   uint32_t *current, uint32_t *max)
 {
 if (netdev_linux_netnsid_is_remote(netdev)) {
+*current = 0;
+*max = 0;
 return EOPNOTSUPP;
 }
 
@@ -2735,6 +2737,9 @@ netdev_linux_get_speed_locked(struct netdev_linux *netdev,
? 0 : netdev->current_speed;
 *max = MIN(UINT32_MAX,
netdev_features_to_bps(netdev->supported, 0) / 100ULL);
+} else {
+*current = 0;
+*max = 0;
 }
 return netdev->get_features_error;
 }
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 4/6] socket: Fix uninitialized values in inet_parse_ functions.

2024-05-23 Thread Mike Pattrick
Clang's static analyzer will complain about uninitialized value
dns_failure because we weren't setting a value for dns_failure in all
code paths.

Now we initialize this in the error conditions of inet_parse_passive and
inet_parse_active.

Fixes: 08e9e5337383 ("ovsdb: raft: Fix inability to read the database with DNS 
host names.")
Fixes: 5f219af8b3c7 ("ovsdb-server: Fix handling of DNS name for listener 
configuration.")
Signed-off-by: Mike Pattrick 
---
 lib/socket-util.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/lib/socket-util.c b/lib/socket-util.c
index 2d89fce85..c569b7d16 100644
--- a/lib/socket-util.c
+++ b/lib/socket-util.c
@@ -546,9 +546,15 @@ inet_parse_active(const char *target_, int default_port,
 if (!host) {
 VLOG_ERR("%s: host must be specified", target_);
 ok = false;
+if (dns_failure) {
+*dns_failure = false;
+}
 } else if (!port && default_port < 0) {
 VLOG_ERR("%s: port must be specified", target_);
 ok = false;
+if (dns_failure) {
+*dns_failure = false;
+}
 } else {
 ok = parse_sockaddr_components(ss, host, port, default_port,
target_, resolve_host, dns_failure);
@@ -671,6 +677,9 @@ inet_parse_passive(const char *target_, int default_port,
 if (!port && default_port < 0) {
 VLOG_ERR("%s: port must be specified", target_);
 ok = false;
+if (dns_failure) {
+*dns_failure = false;
+}
 } else {
 ok = parse_sockaddr_components(ss, host, port, default_port,
target_, resolve_host, dns_failure);
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 5/6] netdev-linux: Return an error if device feature names are empty.

2024-05-23 Thread Mike Pattrick
When retrieving a list of features supported by a network card, return
with an error code if the request completed without an error but the
list contains zero entries.

In practice this should never happen, but it does contribute to a
detection in Clang's static analyzer.

Fixes: 6c59c195266c ("netdev-linux: Use ethtool to detect offload support.")
Signed-off-by: Mike Pattrick 
---
 lib/netdev-linux.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 25349c605..8b855bdc4 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2439,7 +2439,9 @@ netdev_linux_read_definitions(struct netdev_linux *netdev,
 int error = 0;
 
 error = netdev_linux_read_stringset_info(netdev, );
-if (error || !len) {
+if (!len) {
+return -EOPNOTSUPP;
+} else if (error) {
 return error;
 }
 strings = xzalloc(sizeof *strings + len * ETH_GSTRING_LEN);
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] fix ovs ipf crash

2024-05-21 Thread Mike Pattrick
On Mon, May 20, 2024 at 2:56 AM laixiangwu <15310488...@163.com> wrote:
>
> Description:
>
> when 1) The fragment timeout is between 15 seconds and 25 seconds; 2) The 
> ipf_list currently has received more than 32 fragments, and there are other 
> fragments of same big packet that have not been received.
>
> When the above two scenario conditions are met, due to exceeding the capacity 
> of the packet batch(here is 32), ipf_dp_packet_batch_add returns false, and 
> ipf_list will not be cleared. However, the 32 fragments packets added to the 
> packet batch will be processed normally. When receiving the subsequent 
> fragments of the ipf_list, because the first 32 fragments have been 
> processed, when processing subsequent fragment packets, relevant information 
> about the processed fragment packets will be read,therefore will occur carsh.
> One solution is do not forward timeout fragment packets from the above 
> scenarios, that is, do not add them to the packet batch, and handle other 
> scenarios according to the original logic.

Hello laixaingwu,

I happen to have a patch on the list right now for a similar sounding issue:

https://patchwork.ozlabs.org/project/openvswitch/patch/20240516153832.153496-1-...@redhat.com/

Do you happen to have a stack trace available for this crash? That
could help determine if the crash is triggered at the same location.

I'm not sure I like the approach of not adding any fragments to the
batch during expiration, as this changes how packets are processed
currently and may be an unexpected behaviour for end users. Would it
be better to only add a few to the current batch and add the remaining
to later batches?

Thanks,
M

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 4/6] socket: Fix uninitialized values in inet_parse_ functions.

2024-05-27 Thread Mike Pattrick
Clang's static analyzer will complain about uninitialized value
dns_failure because we weren't setting a value for dns_failure in all
code paths.

Now we initialize this in the error conditions of inet_parse_passive and
inet_parse_active.

Fixes: 08e9e5337383 ("ovsdb: raft: Fix inability to read the database with DNS 
host names.")
Fixes: 5f219af8b3c7 ("ovsdb-server: Fix handling of DNS name for listener 
configuration.")
Signed-off-by: Mike Pattrick 
---
 lib/socket-util.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/lib/socket-util.c b/lib/socket-util.c
index 2d89fce85..c569b7d16 100644
--- a/lib/socket-util.c
+++ b/lib/socket-util.c
@@ -546,9 +546,15 @@ inet_parse_active(const char *target_, int default_port,
 if (!host) {
 VLOG_ERR("%s: host must be specified", target_);
 ok = false;
+if (dns_failure) {
+*dns_failure = false;
+}
 } else if (!port && default_port < 0) {
 VLOG_ERR("%s: port must be specified", target_);
 ok = false;
+if (dns_failure) {
+*dns_failure = false;
+}
 } else {
 ok = parse_sockaddr_components(ss, host, port, default_port,
target_, resolve_host, dns_failure);
@@ -671,6 +677,9 @@ inet_parse_passive(const char *target_, int default_port,
 if (!port && default_port < 0) {
 VLOG_ERR("%s: port must be specified", target_);
 ok = false;
+if (dns_failure) {
+*dns_failure = false;
+}
 } else {
 ok = parse_sockaddr_components(ss, host, port, default_port,
target_, resolve_host, dns_failure);
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 2/6] netdev-native-tnl: Fix use of uninitialized offset on SRv6 header pop.

2024-05-27 Thread Mike Pattrick
Clang's static analyzer will complain about uninitialized value 'hlen'
because we weren't properly checking the error code from a function that
would have initialized the value.

Instead, add a check for that return code.

Fixes: 03fc1ad78521 ("userspace: Add SRv6 tunnel support.")
Signed-off-by: Mike Pattrick 
---
 lib/netdev-native-tnl.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index b21176037..d6f46ac4a 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -1067,7 +1067,10 @@ netdev_srv6_pop_header(struct dp_packet *packet)
 }
 
 pkt_metadata_init_tnl(md);
-netdev_tnl_ip_extract_tnl_md(packet, tnl, );
+if (!netdev_tnl_ip_extract_tnl_md(packet, tnl, )) {
+goto err;
+}
+
 dp_packet_reset_packet(packet, hlen);
 
 return packet;
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 3/6] dpctl: Fix uninitialized value when deleting flows.

2024-05-27 Thread Mike Pattrick
Clang's static analyzer will complain about an uninitialized value
because we weren't setting a value for ufid_generated in all code paths.

Now we initialize this on declaration. This patch also corrects the
reverse x-mass of variable declaration.

Fixes: bbe2e3928747 ("dpctl: Fix broken flow deletion via ovs-dpctl due to 
missing ufid.")
Signed-off-by: Mike Pattrick 
---
 lib/dpctl.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/lib/dpctl.c b/lib/dpctl.c
index 3c555a559..a70df5342 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -1359,19 +1359,17 @@ static int
 dpctl_del_flow_dpif(struct dpif *dpif, const char *key_s,
 struct dpctl_params *dpctl_p)
 {
+struct dpif_port_dump port_dump;
 struct dpif_flow_stats stats;
+bool ufid_generated = false;
 struct dpif_port dpif_port;
-struct dpif_port_dump port_dump;
-struct ofpbuf key;
+bool ufid_present = false;
+struct simap port_names;
 struct ofpbuf mask; /* To be ignored. */
-
+struct ofpbuf key;
 ovs_u128 ufid;
-bool ufid_generated;
-bool ufid_present;
-struct simap port_names;
 int n, error;
 
-ufid_present = false;
 n = odp_ufid_from_string(key_s, );
 if (n < 0) {
 dpctl_error(dpctl_p, -n, "parsing flow ufid");
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 0/6] clang: Fix Clang's static analyzer detections.

2024-05-27 Thread Mike Pattrick
Clang's static analyzer has identified several instances of uninitialized
variable usage and null pointer dereferences that - while not likely - are
possible. These mostly included making sure that a variable is properly set
or error code properly returned in every error condition.

Signed-off-by: Mike Pattrick 

Mike Pattrick (6):
  netdev-offload: Fix null pointer dereference warning on dump creation.
  netdev-native-tnl: Fix use of uninitialized offset on SRv6 header pop.
  dpctl: Fix uninitialized value when deleting flows.
  socket: Fix uninitialized values in inet_parse_ functions.
  netdev-linux: Return an error if device feature names are empty.
  netdev-linux: Initialize link speed in error conditions.

 lib/dpctl.c | 12 +---
 lib/netdev-linux.c  |  8 +++-
 lib/netdev-native-tnl.c |  5 -
 lib/netdev-offload.c| 10 --
 lib/socket-util.c   |  9 +
 5 files changed, 33 insertions(+), 11 deletions(-)

-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 1/6] netdev-offload: Fix null pointer dereference warning on dump creation.

2024-05-27 Thread Mike Pattrick
Clang's static analyzer will complain about a null pointer dereference
because dumps can be set to null and then there is a loop where it could
have been written to. This is a false positive, but only because the
netdev dpif type won't change during this loop.

Instead, return early from the netdev_ports_flow_dump_create function if
dumps is NULL.

Signed-off-by: Mike Pattrick 
---
 lib/netdev-offload.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index 931d634e1..8a9d36555 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -626,8 +626,8 @@ netdev_ports_traverse(const char *dpif_type,
 struct netdev_flow_dump **
 netdev_ports_flow_dump_create(const char *dpif_type, int *ports, bool terse)
 {
+struct netdev_flow_dump **dumps = NULL;
 struct port_to_netdev_data *data;
-struct netdev_flow_dump **dumps;
 int count = 0;
 int i = 0;
 
@@ -638,7 +638,11 @@ netdev_ports_flow_dump_create(const char *dpif_type, int 
*ports, bool terse)
 }
 }
 
-dumps = count ? xzalloc(sizeof *dumps * count) : NULL;
+if (!count) {
+goto unlock;
+}
+
+dumps = xzalloc(sizeof *dumps * count);
 
 HMAP_FOR_EACH (data, portno_node, _to_netdev) {
 if (netdev_get_dpif_type(data->netdev) == dpif_type) {
@@ -650,6 +654,8 @@ netdev_ports_flow_dump_create(const char *dpif_type, int 
*ports, bool terse)
 i++;
 }
 }
+
+unlock:
 ovs_rwlock_unlock(_to_netdev_rwlock);
 
 *ports = i;
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 5/6] netdev-linux: Return an error if device feature names are empty.

2024-05-27 Thread Mike Pattrick
When retrieving a list of features supported by a network card, return
with an error code if the request completed without an error but the
list contains zero entries.

In practice this should never happen, but it does contribute to a
detection in Clang's static analyzer.

Fixes: 6c59c195266c ("netdev-linux: Use ethtool to detect offload support.")
Signed-off-by: Mike Pattrick 
---
 lib/netdev-linux.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 25349c605..c89a85a38 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2439,9 +2439,12 @@ netdev_linux_read_definitions(struct netdev_linux 
*netdev,
 int error = 0;
 
 error = netdev_linux_read_stringset_info(netdev, );
-if (error || !len) {
+if (error) {
 return error;
+} else if (!len) {
+return -EOPNOTSUPP;
 }
+
 strings = xzalloc(sizeof *strings + len * ETH_GSTRING_LEN);
 
 strings->cmd = ETHTOOL_GSTRINGS;
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 6/6] netdev-linux: Initialize link speed in error conditions.

2024-05-27 Thread Mike Pattrick
Clang's static analyzer noted that the output from
netdev_linux_get_speed_locked can be checked even if this function
doesn't set any values.

Now we always set those values to a sane default in all cases.

Fixes: b8f8fad86435 ("netdev-linux: Use speed as max rate in tc classes.")
Signed-off-by: Mike Pattrick 
---
 lib/netdev-linux.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index c89a85a38..ff5e94856 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2727,6 +2727,7 @@ netdev_linux_get_speed_locked(struct netdev_linux *netdev,
   uint32_t *current, uint32_t *max)
 {
 if (netdev_linux_netnsid_is_remote(netdev)) {
+*current = *max = 0;
 return EOPNOTSUPP;
 }
 
@@ -2736,6 +2737,8 @@ netdev_linux_get_speed_locked(struct netdev_linux *netdev,
? 0 : netdev->current_speed;
 *max = MIN(UINT32_MAX,
netdev_features_to_bps(netdev->supported, 0) / 100ULL);
+} else {
+*current = *max = 0;
 }
 return netdev->get_features_error;
 }
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 1/8] netdev-linux: Fix possible int overflow in tc_add_matchall_policer().

2024-05-28 Thread Mike Pattrick
On Tue, May 28, 2024 at 7:46 AM Eelco Chaudron  wrote:
>
> Fix unintentional integer overflow reported by Coverity by adding
> the ULL suffix to the numerical literals used in the multiplications.
>
> Fixes: ed2300cca0d3 ("netdev-linux: Refactor put police action netlink 
> message")
> Signed-off-by: Eelco Chaudron 

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 3/8] sflow: Replace libc's random() function with the OVS's random_range().

2024-05-28 Thread Mike Pattrick
On Tue, May 28, 2024 at 7:46 AM Eelco Chaudron  wrote:
>
> Coverity has flagged the use of a potentially unsafe function.
> Although this is not a concern in this case since it's not used for
> encryption, we should replace it with the OVS implementation to
> achieve better randomness.
>
> Fixes: c72e245a0e2c ("Add InMon's sFlow Agent library to the build system.")
> Signed-off-by: Eelco Chaudron 

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 4/8] sflow: Use uint32_t instead of time_t for tick handling in the poller.

2024-05-28 Thread Mike Pattrick
On Tue, May 28, 2024 at 7:46 AM Eelco Chaudron  wrote:
>
> The sFlow library uses a uint32_t to configure timeout ticks, but
> stores this value as a time_t. Although this doesn't cause functional
> issues, it wastes space and confuses Coverity, potentially indicating
> a Y2K38 problem when storing uint32_t values in time_t. This patch
> updates the internal data structures to use uint32_t variables.
>
> Fixes: c72e245a0e2c ("Add InMon's sFlow Agent library to the build system.")
> Signed-off-by: Eelco Chaudron 
> ---

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 2/8] cfm: Fix possible integer overflow in tc_add_matchall_policer().

2024-05-28 Thread Mike Pattrick
On Tue, May 28, 2024 at 7:46 AM Eelco Chaudron  wrote:
>
> Fix unintentional integer overflow reported by Coverity by adding
> the LL suffix to the numerical literals used in the multiplication.
>
> Fixes: 5767a79a4059 ("cfm: Require ccm received in demand mode.")
> Signed-off-by: Eelco Chaudron 

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 6/6] netdev-linux: Initialize link speed in error conditions.

2024-05-28 Thread Mike Pattrick
On Tue, May 28, 2024 at 4:26 PM Ilya Maximets  wrote:
>
> On 5/27/24 21:08, Mike Pattrick wrote:
> > Clang's static analyzer noted that the output from
> > netdev_linux_get_speed_locked can be checked even if this function
> > doesn't set any values.
> >
> > Now we always set those values to a sane default in all cases.
> >
> > Fixes: b8f8fad86435 ("netdev-linux: Use speed as max rate in tc classes.")
>
> This is still an incorrect Fixes tag.  The correct one is:
>
> Fixes: 19cffe30cfda ("netdev-linux: Avoid deadlock in netdev_get_speed.")

Sorry about that, I thought I had corrected it when resubmitting, but
it looks like this slipped through.

Cheers,
M

>
> The original netdev_get_speed() call was fine, because it ensures that values
> are zeroed out even on errors.  That is defined in netdev-provider API.  But
> the new static netdev_linux_get_speed_locked() function didn't do the same.
>
> I fixed that and applied the set.  Individual patches backported according to
> their Fixes tags.  Thanks!
>
> Best regards, Ilya Maximets.
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/1] ofproto-dpif-upcall: Avoid stale ukeys leaks.

2024-05-29 Thread Mike Pattrick
On Thu, May 23, 2024 at 6:47 AM Roi Dayan via dev
 wrote:
>
> It is observed in some environments that there are much more ukeys than
> actual DP flows. For example:
>
> $ ovs-appctl upcall/show
> system@ovs-system:
> flows : (current 7) (avg 6) (max 117) (limit 2125)
> offloaded flows : 525
> dump duration : 1063ms
> ufid enabled : true
>
> 23: (keys 3612)
> 24: (keys 3625)
> 25: (keys 3485)
>
> The revalidator threads are busy revalidating the stale ukeys leading to
> high CPU and long dump duration.
>
> There are some possible situations that may result in stale ukeys that
> have no corresponding DP flows.
>
> In revalidator, push_dp_ops() doesn't check error if the op type is not
> DEL. It is possible that a PUT(MODIFY) fails, especially for tc offload
> case, where the old flow is deleted first and then the new one is
> created. If the creation fails, the ukey will be stale (no corresponding
> DP flow). This patch adds a warning in such case.
>
> Another possible scenario is in handle_upcalls() if a PUT operation did
> not succeed and op->error attribute was not set correctly it can lead to
> stale ukey in operational state.
>
> This patch adds checks in the sweep phase for such ukeys and move them
> to DELETE so that they can be cleared eventually.
>
> Co-authored-by: Han Zhou 
> Signed-off-by: Han Zhou 
> Signed-off-by: Roi Dayan 
> ---
>  ofproto/ofproto-dpif-upcall.c | 15 +++
>  1 file changed, 15 insertions(+)
>
> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
> index 83609ec62b63..e9520ebdf910 100644
> --- a/ofproto/ofproto-dpif-upcall.c
> +++ b/ofproto/ofproto-dpif-upcall.c
> @@ -57,6 +57,7 @@ COVERAGE_DEFINE(dumped_inconsistent_flow);
>  COVERAGE_DEFINE(dumped_new_flow);
>  COVERAGE_DEFINE(handler_duplicate_upcall);
>  COVERAGE_DEFINE(revalidate_missed_dp_flow);
> +COVERAGE_DEFINE(revalidate_missed_dp_flow_del);
>  COVERAGE_DEFINE(ukey_dp_change);
>  COVERAGE_DEFINE(ukey_invalid_stat_reset);
>  COVERAGE_DEFINE(ukey_replace_contention);
> @@ -278,6 +279,7 @@ enum flow_del_reason {
>  FDR_BAD_ODP_FIT,/* Bad ODP flow fit. */
>  FDR_FLOW_IDLE,  /* Flow idle timeout. */
>  FDR_FLOW_LIMIT, /* Kill all flows condition reached. */
> +FDR_FLOW_STALE, /* Flow stale detected. */
>  FDR_FLOW_WILDCARDED,/* Flow needs a narrower wildcard mask. */
>  FDR_NO_OFPROTO, /* Bridge not found. */
>  FDR_PURGE,  /* User requested flow deletion. */
> @@ -2557,6 +2559,10 @@ push_dp_ops(struct udpif *udpif, struct ukey_op *ops, 
> size_t n_ops)
>
>  if (op->dop.type != DPIF_OP_FLOW_DEL) {
>  /* Only deleted flows need their stats pushed. */
> +if (op->dop.error) {
> +VLOG_WARN_RL(, "push_dp_ops: error %d in op type %d, ukey 
> "
> + "%p", op->dop.error, op->dop.type, op->ukey);
> +}
>  continue;
>  }
>
> @@ -3027,6 +3033,15 @@ revalidator_sweep__(struct revalidator *revalidator, 
> bool purge)
>  del_reason = purge ? FDR_PURGE : FDR_UPDATE_FAIL;
>  } else if (!seq_mismatch) {
>  result = UKEY_KEEP;
> +} else if (!ukey->stats.used &&

Would it be possible for stats.used to be set but the dp flow to be
deleted? For example, if a flow is offloaded to TC, but something
external to OVS clears it?


Thanks,
Mike

> +   udpif_flow_time_delta(udpif, ukey) * 1000 >
> +   ofproto_max_idle) {
> +COVERAGE_INC(revalidate_missed_dp_flow_del);
> +VLOG_WARN_RL(, "revalidator_sweep__: Remove stale 
> ukey "
> + "%p delta %llus", ukey,
> + udpif_flow_time_delta(udpif, ukey));
> +result = UKEY_DELETE;
> +del_reason = FDR_FLOW_STALE;
>  } else {
>  struct dpif_flow_stats stats;
>  COVERAGE_INC(revalidate_missed_dp_flow);
> --
> 2.21.0
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH]ipf: Fix ovs ipf crash.

2024-05-22 Thread Mike Pattrick
Hello laixaingwu,

I happen to have a patch on the list right now for a similar sounding issue:

https://patchwork.ozlabs.org/project/openvswitch/patch/20240516153832.153496-1-...@redhat.com/

Do you happen to have a stack trace available for this crash? That
could help determine if the crash is triggered at the same location.

I'm not sure I like the approach of not adding any fragments to the
batch during expiration, as this changes how packets are processed
currently and may be an unexpected behaviour for end users. Would it
be better to only add a few to the current batch and add the remaining
to later batches?

Thanks,
M



On Tue, May 21, 2024 at 10:20 PM laixiangwu <15310488...@163.com> wrote:
>
> Description:
>
> when 1) The fragment timeout is between 15 seconds and 25 seconds; 2)
> The ipf_list currently has received more than 32 fragments, and there
> are other fragments of same big packet that have not been received.
>
> When the above two scenario conditions are met, due to exceeding the
> capacity of the packet batch(here is 32), ipf_dp_packet_batch_add
> returns false, and ipf_list will not be cleared. However, the 32
> fragments packets added to the packet batch will be processed normally.
> When receiving the subsequent fragments of the ipf_list, because the
> first 32 fragments have been processed, when processing subsequent
> fragment packets, relevant information about the processed fragment
> packets will be read,therefore will occur carsh.
> One solution is do not forward timeout fragment packets from the above
> scenarios, that is, do not add them to the packet batch, and handle
> other scenarios according to the original logic.
> ---
>  lib/ipf.c | 10 --
>  1 file changed, 4 insertions(+), 6 deletions(-)
>
> diff --git a/lib/ipf.c b/lib/ipf.c
> index d45266374..9258173ab 100644
> --- a/lib/ipf.c
> +++ b/lib/ipf.c
> @@ -1011,7 +1011,7 @@ ipf_purge_list_check(struct ipf *ipf, struct ipf_list 
> *ipf_list,
>  }
>
>  /* Does the packet batch management and common accounting work associated
> - * with 'ipf_send_completed_frags()' and 'ipf_send_expired_frags()'. */
> + * with 'ipf_send_completed_frags()'. */
>  static bool
>  ipf_send_frags_in_list(struct ipf *ipf, struct ipf_list *ipf_list,
> struct dp_packet_batch *pb,
> @@ -1076,8 +1076,7 @@ ipf_send_completed_frags(struct ipf *ipf, struct 
> dp_packet_batch *pb,
>   * a packet batch to be processed by the calling application, typically
>   * conntrack. Also cleans up the list context when it is empty.*/
>  static void
> -ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb,
> -   long long now, bool v6)
> +ipf_clean_expired_frags(struct ipf *ipf, long long now)
>  {
>  enum {
>  /* Very conservative, due to DOS probability. */
> @@ -1099,8 +1098,7 @@ ipf_send_expired_frags(struct ipf *ipf, struct 
> dp_packet_batch *pb,
>  break;
>  }
>
> -if (ipf_send_frags_in_list(ipf, ipf_list, pb, IPF_FRAG_EXPIRY_LIST,
> -   v6, now)) {
> +if (ipf_purge_list_check(ipf, ipf_list, now)) {
>  ipf_expiry_list_clean(>frag_lists, ipf_list);
>  lists_removed++;
>  } else {
> @@ -1249,7 +1247,7 @@ ipf_postprocess_conntrack(struct ipf *ipf, struct 
> dp_packet_batch *pb,
>  bool v6 = dl_type == htons(ETH_TYPE_IPV6);
>  ipf_post_execute_reass_pkts(ipf, pb, v6);
>  ipf_send_completed_frags(ipf, pb, now, v6);
> -ipf_send_expired_frags(ipf, pb, now, v6);
> +ipf_clean_expired_frags(ipf, now);
>  }
>  }
>
> --
> 2.31.1.windows.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] python: idl: Fix index not being updated on row modification.

2024-06-05 Thread Mike Pattrick
On Mon, May 27, 2024 at 5:39 PM Ilya Maximets  wrote:
>
> When a row is modified, python IDL doesn't perform any operations on
> existing client-side indexes.  This means that if the column on which
> index is created changes, the old value will remain in the index and
> the new one will not be added to the index.  Beside lookup failures
> this is also causing inability to remove modified rows, because the
> new column value doesn't exist in the index causing an exception on
> attempt to remove it:
>
>  Traceback (most recent call last):
>File "ovsdbapp/backend/ovs_idl/connection.py", line 110, in run
>  self.idl.run()
>File "ovs/db/idl.py", line 465, in run
>  self.__parse_update(msg.params[2], OVSDB_UPDATE3)
>File "ovs/db/idl.py", line 924, in __parse_update
>  self.__do_parse_update(update, version, self.tables)
>File "ovs/db/idl.py", line 964, in __do_parse_update
>  changes = self.__process_update2(table, uuid, row_update)
>File "ovs/db/idl.py", line 991, in __process_update2
>  del table.rows[uuid]
>File "ovs/db/custom_index.py", line 102, in __delitem__
>  index.remove(val)
>File "ovs/db/custom_index.py", line 66, in remove
>  self.values.remove(self.index_entry_from_row(row))
>File "sortedcontainers/sortedlist.py", line 2015, in remove
>  raise ValueError('{0!r} not in list'.format(value))
>  ValueError: Datapath_Binding(
>uuid=UUID('498e66a2-70bc-4587-a66f-0433baf82f60'),
>tunnel_key=16711683, load_balancers=[], external_ids={}) not in list
>
> Fix that by always removing an existing row from indexes before
> modification and adding back afterwards.  This ensures that old
> values are removed from the index and new ones are added.
>
> This behavior is consistent with the C implementation.
>
> The new test that reproduces the removal issue is added.  Some extra
> testing infrastructure added to be able to handle and print out the
> 'indexed' table from the idltest schema.
>
> Fixes: 13973bc41524 ("Add multi-column index support for the Python IDL")
> Reported-at: 
> https://mail.openvswitch.org/pipermail/ovs-discuss/2024-May/053159.html
> Reported-by: Roberto Bartzen Acosta 
> Signed-off-by: Ilya Maximets 

I've tested this a bit and it seems like a reasonable solution.

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] checkpatch: Extend and move extra_keywords list to file.

2024-06-05 Thread Mike Pattrick
This patch extends the extra_keywords list from 324 to 747 keywords and
moves this list to a separate file. The methodology used to create this
list was running the spell checker on a large volume of historical
patches and selecting any words that appeared multiple times.

The rational for using a separate file is to make management of this
list simpler by decoupling the code from the keywords.

Signed-off-by: Mike Pattrick 
---
 utilities/checkpatch.py  |  67 +---
 utilities/extra_keywords | 747 +++
 2 files changed, 750 insertions(+), 64 deletions(-)
 create mode 100644 utilities/extra_keywords

diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py
index 6b293770d..08b5870d3 100755
--- a/utilities/checkpatch.py
+++ b/utilities/checkpatch.py
@@ -49,70 +49,9 @@ def open_spell_check_dict():
 codespell_file = ''
 
 try:
-extra_keywords = ['ovs', 'vswitch', 'vswitchd', 'ovs-vswitchd',
-  'netdev', 'selinux', 'ovs-ctl', 'dpctl', 'ofctl',
-  'openvswitch', 'dpdk', 'hugepage', 'hugepages',
-  'pmd', 'upcall', 'vhost', 'rx', 'tx', 'vhostuser',
-  'openflow', 'qsort', 'rxq', 'txq', 'perf', 'stats',
-  'struct', 'int', 'char', 'bool', 'upcalls', 'nicira',
-  'bitmask', 'ipv4', 'ipv6', 'tcp', 'tcp4', 'tcpv4',
-  'udp', 'udp4', 'udpv4', 'icmp', 'icmp4', 'icmpv6',
-  'vlan', 'vxlan', 'cksum', 'csum', 'checksum',
-  'ofproto', 'numa', 'mempool', 'mempools', 'mbuf',
-  'mbufs', 'hmap', 'cmap', 'smap', 'dhcpv4', 'dhcp',
-  'dhcpv6', 'opts', 'metadata', 'geneve', 'mutex',
-  'netdev', 'netdevs', 'subtable', 'virtio', 'qos',
-  'policer', 'datapath', 'tunctl', 'attr', 'ethernet',
-  'ether', 'defrag', 'defragment', 'loopback', 'sflow',
-  'acl', 'initializer', 'recirc', 'xlated', 'unclosed',
-  'netlink', 'msec', 'usec', 'nsec', 'ms', 'us', 'ns',
-  'kilobits', 'kbps', 'kilobytes', 'megabytes', 'mbps',
-  'gigabytes', 'gbps', 'megabits', 'gigabits', 'pkts',
-  'tuple', 'miniflow', 'megaflow', 'conntrack',
-  'vlans', 'vxlans', 'arg', 'tpid', 'xbundle',
-  'xbundles', 'mbundle', 'mbundles', 'netflow',
-  'localnet', 'odp', 'pre', 'dst', 'dest', 'src',
-  'ethertype', 'cvlan', 'ips', 'msg', 'msgs',
-  'liveness', 'userspace', 'eventmask', 'datapaths',
-  'slowpath', 'fastpath', 'multicast', 'unicast',
-  'revalidation', 'namespace', 'qdisc', 'uuid',
-  'ofport', 'subnet', 'revalidation', 'revalidator',
-  'revalidate', 'l2', 'l3', 'l4', 'openssl', 'mtu',
-  'ifindex', 'enum', 'enums', 'http', 'https', 'num',
-  'vconn', 'vconns', 'conn', 'nat', 'memset', 'memcmp',
-  'strcmp', 'strcasecmp', 'tc', 'ufid', 'api',
-  'ofpbuf', 'ofpbufs', 'hashmaps', 'hashmap', 'deref',
-  'dereference', 'hw', 'prio', 'sendmmsg', 'sendmsg',
-  'malloc', 'free', 'alloc', 'pid', 'ppid', 'pgid',
-  'uid', 'gid', 'sid', 'utime', 'stime', 'cutime',
-  'cstime', 'vsize', 'rss', 'rsslim', 'whcan', 'gtime',
-  'eip', 'rip', 'cgtime', 'dbg', 'gw', 'sbrec', 'bfd',
-  'sizeof', 'pmds', 'nic', 'nics', 'hwol', 'encap',
-  'decap', 'tlv', 'tlvs', 'decapsulation', 'fd',
-  'cacheline', 'xlate', 'skiplist', 'idl',
-  'comparator', 'natting', 'alg', 'pasv', 'epasv',
-  'wildcard', 'nated', 'amd64', 'x86_64',
-  'recirculation', 'linux', 'afxdp', 'promisc', 'goto',
-  'misconfigured', 'misconfiguration', 'checkpatch',
-  'debian', 'travis', 'cirrus', 'appveyor', 'faq',
-  'erspan', 'const', 'hotplug', 'addresssanitizer',
-  'ovsdb', 'dpif', 'veth', 'rhel', 'jsonrpc', 'json',
-  'syscall', 'lacp', 'ipf', 'skb', 'valgrind',
-  'appctl', 'arp', 'asan', 'backport', 'backtrace',
-  'chmod', 'ci', 'cpu', 'cpus', 'dnat', 'dns', 'dpcls',
-  'eol', 'ethtool', 'fdb', 'freebsd', 'gcc', 'github',
-  'glibc', 'gre', 'inlined', 'ip', 'ipfix', 'ipsec',
-  'ixgbe

[ovs-dev] [PATCH v2] checkpatch: Extend and move extra_keywords list to file.

2024-06-05 Thread Mike Pattrick
This patch extends the extra_keywords list from 324 to 747 keywords and
moves this list to a separate file. The methodology used to create this
list was running the spell checker on a large volume of historical
patches and selecting any words that appeared multiple times.

The rational for using a separate file is to make management of this
list simpler by decoupling the code from the keywords.

Signed-off-by: Mike Pattrick 
---
v2: Included new file in distfiles
---
 utilities/automake.mk|   1 +
 utilities/checkpatch.py  |  67 +---
 utilities/extra_keywords | 747 +++
 3 files changed, 751 insertions(+), 64 deletions(-)
 create mode 100644 utilities/extra_keywords

diff --git a/utilities/automake.mk b/utilities/automake.mk
index 146b8c37f..3f14c0fef 100644
--- a/utilities/automake.mk
+++ b/utilities/automake.mk
@@ -65,6 +65,7 @@ EXTRA_DIST += \
utilities/ovs-vlan-test.in \
utilities/ovs-vsctl-bashcomp.bash \
utilities/checkpatch.py \
+   utilities/extra_keywords \
utilities/docker/Makefile \
utilities/docker/ovs-override.conf \
utilities/docker/start-ovs \
diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py
index 6b293770d..08b5870d3 100755
--- a/utilities/checkpatch.py
+++ b/utilities/checkpatch.py
@@ -49,70 +49,9 @@ def open_spell_check_dict():
 codespell_file = ''
 
 try:
-extra_keywords = ['ovs', 'vswitch', 'vswitchd', 'ovs-vswitchd',
-  'netdev', 'selinux', 'ovs-ctl', 'dpctl', 'ofctl',
-  'openvswitch', 'dpdk', 'hugepage', 'hugepages',
-  'pmd', 'upcall', 'vhost', 'rx', 'tx', 'vhostuser',
-  'openflow', 'qsort', 'rxq', 'txq', 'perf', 'stats',
-  'struct', 'int', 'char', 'bool', 'upcalls', 'nicira',
-  'bitmask', 'ipv4', 'ipv6', 'tcp', 'tcp4', 'tcpv4',
-  'udp', 'udp4', 'udpv4', 'icmp', 'icmp4', 'icmpv6',
-  'vlan', 'vxlan', 'cksum', 'csum', 'checksum',
-  'ofproto', 'numa', 'mempool', 'mempools', 'mbuf',
-  'mbufs', 'hmap', 'cmap', 'smap', 'dhcpv4', 'dhcp',
-  'dhcpv6', 'opts', 'metadata', 'geneve', 'mutex',
-  'netdev', 'netdevs', 'subtable', 'virtio', 'qos',
-  'policer', 'datapath', 'tunctl', 'attr', 'ethernet',
-  'ether', 'defrag', 'defragment', 'loopback', 'sflow',
-  'acl', 'initializer', 'recirc', 'xlated', 'unclosed',
-  'netlink', 'msec', 'usec', 'nsec', 'ms', 'us', 'ns',
-  'kilobits', 'kbps', 'kilobytes', 'megabytes', 'mbps',
-  'gigabytes', 'gbps', 'megabits', 'gigabits', 'pkts',
-  'tuple', 'miniflow', 'megaflow', 'conntrack',
-  'vlans', 'vxlans', 'arg', 'tpid', 'xbundle',
-  'xbundles', 'mbundle', 'mbundles', 'netflow',
-  'localnet', 'odp', 'pre', 'dst', 'dest', 'src',
-  'ethertype', 'cvlan', 'ips', 'msg', 'msgs',
-  'liveness', 'userspace', 'eventmask', 'datapaths',
-  'slowpath', 'fastpath', 'multicast', 'unicast',
-  'revalidation', 'namespace', 'qdisc', 'uuid',
-  'ofport', 'subnet', 'revalidation', 'revalidator',
-  'revalidate', 'l2', 'l3', 'l4', 'openssl', 'mtu',
-  'ifindex', 'enum', 'enums', 'http', 'https', 'num',
-  'vconn', 'vconns', 'conn', 'nat', 'memset', 'memcmp',
-  'strcmp', 'strcasecmp', 'tc', 'ufid', 'api',
-  'ofpbuf', 'ofpbufs', 'hashmaps', 'hashmap', 'deref',
-  'dereference', 'hw', 'prio', 'sendmmsg', 'sendmsg',
-  'malloc', 'free', 'alloc', 'pid', 'ppid', 'pgid',
-  'uid', 'gid', 'sid', 'utime', 'stime', 'cutime',
-  'cstime', 'vsize', 'rss', 'rsslim', 'whcan', 'gtime',
-  'eip', 'rip', 'cgtime', 'dbg', 'gw', 'sbrec', 'bfd',
-  'sizeof', 'pmds', 'nic', 'nics', 'hwol', 'encap',
-  'decap', 'tlv', 'tlvs', 'decapsulation', 'fd',
-  'cacheline', 'xlate', 'skiplist', 'idl',
-  'comparator', 'natting', 'alg', 'pasv', 'epasv',
-  'wildcard', 'nated', 'amd64', 'x86_64',
-  'recirculation', 'linux', 'afxdp', 'promisc', 'goto',
-  'misconfigured', 'misconfiguration', 'checkpatch',
-  'debian', 'travis', 'cirrus', 'appveyor', 'faq',
-  'erspan', 'const', 'hotplug

Re: [ovs-dev] [PATCH v2] ovsdb: Use table indexes if available for ovsdb_query().

2024-06-03 Thread Mike Pattrick
On Mon, Jun 3, 2024 at 9:56 AM Simon Horman  wrote:
>
> On Mon, Jun 03, 2024 at 12:20:36AM -0400, Mike Pattrick wrote:
> > Currently all OVSDB database queries except for UUID lookups all result
> > in linear lookups over the entire table, even if an index is present.
> >
> > This patch modifies ovsdb_query() to attempt an index lookup first, if
> > possible. If no matching indexes are present then a linear index is
> > still conducted.
> >
> > Reported-at: https://issues.redhat.com/browse/FDP-590
> > Signed-off-by: Mike Pattrick 
>
> ...
>
> > diff --git a/ovsdb/query.c b/ovsdb/query.c
> > index eebe56412..e3e50a034 100644
> > --- a/ovsdb/query.c
> > +++ b/ovsdb/query.c
> > @@ -21,32 +21,116 @@
> >  #include "condition.h"
> >  #include "row.h"
> >  #include "table.h"
> > +#include "transaction.h"
> > +
> > +static bool
> > +ovsdb_query_index(struct ovsdb_table *table,
> > +  const struct ovsdb_condition *cnd,
> > +  const struct ovsdb_row **out)
> > +{
> > +for (size_t idx = 0; idx < table->schema->n_indexes; idx++) {
> > +const struct ovsdb_column_set *index = 
> > >schema->indexes[idx];
> > +struct hmap_node *node;
> > +size_t matches = 0;
> > +uint32_t hash = 0;
> > +
> > +if (index->n_columns != cnd->n_clauses) {
> > +continue;
> > +}
> > +
> > +/* The conditions may not be in the same order as the index. */
> > +for (size_t c = 0; c < cnd->n_clauses; c++) {
> > +const struct ovsdb_clause *cnd_cls = >clauses[c];
> > +
> > +if (cnd_cls->function != OVSDB_F_EQ) {
> > +return false;
> > +}
> > +
> > +for (size_t i = 0; i < index->n_columns; i++) {
> > +const struct ovsdb_column *idx_col = index->columns[i];
> > +
> > +if (cnd_cls->index == idx_col->index) {
> > +hash = ovsdb_datum_hash(_cls->arg, _col->type,
> > +hash);
> > +matches++;
> > +break;
> > +}
> > +}
> > +
> > +/* If none of the indexed columns match, continue to the next
> > + * index. */
> > +if (matches == c) {
> > +break;
> > +}
> > +}
> > +
> > +if (matches != cnd->n_clauses) {
> > +continue;
> > +}
> > +
> > +for (node = hmap_first_with_hash(>indexes[idx], hash); node;
> > + node = hmap_next_with_hash(node)) {
> > +struct ovsdb_row *irow = ovsdb_row_from_index_node(node, table,
> > +   idx);
> > +
> > +for (size_t c = 0; c < cnd->n_clauses; c++) {
> > +const struct ovsdb_clause *cnd_cls = >clauses[c];
> > +
> > +if (!ovsdb_datum_equals(_cls->arg,
> > +>fields[cnd_cls->index],
> > +_cls->column->type)) {
> > +irow = NULL;
> > +break;
> > +}
> > +}
> > +
> > +if (irow) {
> > +*out = irow;
> > +return true;
> > +}
> > +}
> > +
> > +/* In the case that there was a matching index but no matching 
> > row, the
> > + * index check is still considered to be a success. */
> > +return true;
>
> Hi Mike,
>
> Maybe I misread it, but it seems that the code above implements:
> 1. If a row is found, return true
> 2. Otherwise, returns true
>
> If so, then is there a need 1?

That's true, I could just break out of the loop. Github-ci also
pointed out another issue with the patch that I'm working on. When I
resubmit I'll correct this too.

-M

>
> > +}
> > +return false;
> > +}
>
> ...
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] checkpatch: Extend and move extra_keywords list to file.

2024-06-07 Thread Mike Pattrick
On Fri, Jun 7, 2024 at 2:35 AM Eelco Chaudron  wrote:
>
>
>
> On 6 Jun 2024, at 3:07, Mike Pattrick wrote:
>
> > This patch extends the extra_keywords list from 324 to 747 keywords and
> > moves this list to a separate file. The methodology used to create this
> > list was running the spell checker on a large volume of historical
> > patches and selecting any words that appeared multiple times.
>
> Thanks Mike,
>
> I like the idea of having this in a separate file (I would add the .txt 
> extension to it), however, just blindly taking the last x errors does not 
> seem to be the right approach.
>
> Last time I took the words from the last 1000 commits that made sense. For 
> example, things like countersfn, deviceiocontrol, etc. do not make sense to 
> me to add.

Why wouldn't we want something like deviceiocontrol in an exclusion
list? It's a common Windows function name, any commit that touches the
windows code has a high likelihood of including it.

-M

>
> //Eelco
>
> > The rational for using a separate file is to make management of this
> > list simpler by decoupling the code from the keywords.
> >
> > Signed-off-by: Mike Pattrick 
> > ---
> > v2: Included new file in distfiles
> > ---
> >  utilities/automake.mk|   1 +
> >  utilities/checkpatch.py  |  67 +---
> >  utilities/extra_keywords | 747 +++
> >  3 files changed, 751 insertions(+), 64 deletions(-)
> >  create mode 100644 utilities/extra_keywords
> >
> > diff --git a/utilities/automake.mk b/utilities/automake.mk
> > index 146b8c37f..3f14c0fef 100644
> > --- a/utilities/automake.mk
> > +++ b/utilities/automake.mk
> > @@ -65,6 +65,7 @@ EXTRA_DIST += \
> >   utilities/ovs-vlan-test.in \
> >   utilities/ovs-vsctl-bashcomp.bash \
> >   utilities/checkpatch.py \
> > + utilities/extra_keywords \
> >   utilities/docker/Makefile \
> >   utilities/docker/ovs-override.conf \
> >   utilities/docker/start-ovs \
> > diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py
> > index 6b293770d..08b5870d3 100755
> > --- a/utilities/checkpatch.py
> > +++ b/utilities/checkpatch.py
> > @@ -49,70 +49,9 @@ def open_spell_check_dict():
> >  codespell_file = ''
> >
> >  try:
> > -extra_keywords = ['ovs', 'vswitch', 'vswitchd', 'ovs-vswitchd',
> > -  'netdev', 'selinux', 'ovs-ctl', 'dpctl', 'ofctl',
> > -  'openvswitch', 'dpdk', 'hugepage', 'hugepages',
> > -  'pmd', 'upcall', 'vhost', 'rx', 'tx', 
> > 'vhostuser',
> > -  'openflow', 'qsort', 'rxq', 'txq', 'perf', 
> > 'stats',
> > -  'struct', 'int', 'char', 'bool', 'upcalls', 
> > 'nicira',
> > -  'bitmask', 'ipv4', 'ipv6', 'tcp', 'tcp4', 
> > 'tcpv4',
> > -  'udp', 'udp4', 'udpv4', 'icmp', 'icmp4', 
> > 'icmpv6',
> > -  'vlan', 'vxlan', 'cksum', 'csum', 'checksum',
> > -  'ofproto', 'numa', 'mempool', 'mempools', 'mbuf',
> > -  'mbufs', 'hmap', 'cmap', 'smap', 'dhcpv4', 
> > 'dhcp',
> > -  'dhcpv6', 'opts', 'metadata', 'geneve', 'mutex',
> > -  'netdev', 'netdevs', 'subtable', 'virtio', 'qos',
> > -  'policer', 'datapath', 'tunctl', 'attr', 
> > 'ethernet',
> > -  'ether', 'defrag', 'defragment', 'loopback', 
> > 'sflow',
> > -  'acl', 'initializer', 'recirc', 'xlated', 
> > 'unclosed',
> > -  'netlink', 'msec', 'usec', 'nsec', 'ms', 'us', 
> > 'ns',
> > -  'kilobits', 'kbps', 'kilobytes', 'megabytes', 
> > 'mbps',
> > -  'gigabytes', 'gbps', 'megabits', 'gigabits', 
> > 'pkts',
> > -  'tuple', 'miniflow', 'megaflow', 'conntrack',
> > -  'vlans', 'vxlans', 'arg', 'tpid', 'xbundle',
> > -  'xbundles', 'mbundle', 'mbundles', 'netflow',
> > -  'localnet', 'odp', 'pre', 'dst', 'dest', 'src',
> > -  'ethertype', 'cvlan', 'ips', 'msg', 'msgs',
> > -  'liveness', 'userspace', 'eventmask', 
> > 'datapaths',
> > -  'slowpath', 'fastpath', 'multicast', 'unicast',
> > -  'revalidation', 'namespace', 'qdisc', 

Re: [ovs-dev] [PATCH v2 6/8] ofproto-dpif: Define age as time_t in ofproto_unixctl_fdb_add().

2024-05-28 Thread Mike Pattrick
On Tue, May 28, 2024 at 7:46 AM Eelco Chaudron  wrote:
>
> Fix the warning from Coverity about potential truncation of the
> time_t value when copying to a local variable by changing the
> local variable's type to time_t.
>
> ccc24fc88d59 ("ofproto-dpif: APIs and CLI option to add/delete static fdb 
> entry.")
> Signed-off-by: Eelco Chaudron 
> ---

Acked-by: Mike Pattrick 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netdev-dpdk: Use LSC interrupt mode.

2024-06-13 Thread Mike Pattrick
On Thu, Jun 13, 2024 at 5:59 AM David Marchand
 wrote:
>
> Querying link status may get delayed for an undeterministic (long) time
> with mlx5 ports. This is a consequence of the mlx5 driver calling ethtool
> kernel API and getting stuck on the kernel RTNL lock while some other
> operation is in progress under this lock.
>
> One impact for long link status query is that it is called under the bond
> lock taken in write mode periodically in bond_run().
> In parallel, datapath threads may block requesting to read bonding related
> info (like for example in bond_check_admissibility()).
>
> The LSC interrupt mode is available with many DPDK drivers and is used by
> default with testpmd.
>
> It seems safe enough to switch on this feature by default in OVS.
> We keep the per interface option to disable this feature in case of an
> unforeseen bug.
>
> Signed-off-by: David Marchand 
> ---
>  Documentation/topics/dpdk/phy.rst | 4 ++--
>  NEWS  | 3 +++
>  lib/netdev-dpdk.c | 7 ++-
>  vswitchd/vswitch.xml  | 8 
>  4 files changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/Documentation/topics/dpdk/phy.rst 
> b/Documentation/topics/dpdk/phy.rst
> index efd168cba8..eefc25613d 100644
> --- a/Documentation/topics/dpdk/phy.rst
> +++ b/Documentation/topics/dpdk/phy.rst
> @@ -546,8 +546,8 @@ the firmware every time to fulfil this request.
>
>  Note that not all PMD drivers support LSC interrupts.
>
> -The default configuration is polling mode. To set interrupt mode, option
> -``dpdk-lsc-interrupt`` has to be set to ``true``.
> +The default configuration is interrupt mode. To set polling mode, option
> +``dpdk-lsc-interrupt`` has to be set to ``false``.
>
>  Command to set interrupt mode for a specific interface::
>  $ ovs-vsctl set interface  options:dpdk-lsc-interrupt=true
> diff --git a/NEWS b/NEWS
> index 5ae0108d55..1e19beb793 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -9,6 +9,9 @@ Post-v3.3.0
>   https://github.com/openvswitch/ovs.git
> - DPDK:
>   * OVS validated with DPDK 23.11.1.
> + * Link status changes are now handled via interrupt mode if the DPDK
> +   driver supports it.  It is possible to revert to polling mode by 
> setting
> +   per interface 'dpdk-lsc-interrupt' other_config to 'false'.
>
>
>  v3.3.0 - 16 Feb 2024
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 0fa37d5145..833d852319 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2397,7 +2397,12 @@ netdev_dpdk_set_config(struct netdev *netdev, const 
> struct smap *args,
>  }
>  }
>
> -lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", false);
> +lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", true);
> +if (lsc_interrupt_mode && !(*info.dev_flags & RTE_ETH_DEV_INTR_LSC)) {
> +VLOG_INFO("interface '%s': link status interrupt is not supported.",
> +  netdev_get_name(netdev));
> +lsc_interrupt_mode = false;
> +}

I did a quick grep of the dpdk source tree and noticed the following
drivers had this define:

 bnxt dpaa dpaa2 ena failsafe hns3 mlx4 mlx5 netvsc tap vhost virtio

This may have been an invalid methodology, but I'm noticing some
drivers for very common network cards are missing. Is there a risk
here of starting to print these info messages all the time for a large
number of users who never set this feature in the first place?

It may be preferable to continue  defaulting to true, but only print
the error if the flag was explicitly set.

Cheers,
M

>  if (dev->requested_lsc_interrupt_mode != lsc_interrupt_mode) {
>  dev->requested_lsc_interrupt_mode = lsc_interrupt_mode;
>  netdev_request_reconfigure(netdev);
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 8a1b607d71..e3afb78a4e 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -4647,12 +4647,12 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 
> type=patch options:peer=p1 \
>type='{"type": "boolean"}'>
>  
> -  Set this value to true to configure interrupt mode for
> -  Link State Change (LSC) detection instead of poll mode for the DPDK
> -  interface.
> +  Set this value to false to configure poll mode for
> +  Link State Change (LSC) detection instead of interrupt mode for the
> +  DPDK interface.
>  
>  
> -  If this value is not set, poll mode is configured.
> +  If this value is not set, interrupt mode is configured.
>  
>  
>This parameter has an effect only on netdev dpdk interfaces.
> --
> 2.44.0
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

___
dev mailing list
d...@openvswitch.org

Re: [ovs-dev] [PATCH] odp-execute: Check IPv4 checksum offload flag in AVX.

2024-06-13 Thread Mike Pattrick
On Wed, Jun 12, 2024 at 6:45 AM Emma Finn  wrote:
>
> The AVX implementation for IPv4 action did not check whether
> the IPv4 checksum offload flag has been set and was incorrectly
> calculating checksums in software. Adding a check to skip AVX
> checksum claculation when offload flags are set.

Nit: calculation

>
> Signed-off-by: Emma Finn 
> Reported-by: Eelco Chaudron 
> ---
>  lib/odp-execute-avx512.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
> index 569ea789e..3ddbfcd15 100644
> --- a/lib/odp-execute-avx512.c
> +++ b/lib/odp-execute-avx512.c
> @@ -473,7 +473,8 @@ action_avx512_ipv4_set_addrs(struct dp_packet_batch 
> *batch,
>   * (v_pkt_masked). */
>  __m256i v_new_hdr = _mm256_or_si256(v_key_shuf, v_pkt_masked);
>
> -if (dp_packet_hwol_tx_ip_csum(packet)) {
> +if (dp_packet_hwol_tx_ip_csum(packet) ||
> +dp_packet_hwol_l3_ipv4(packet)) {

dp_packet_hwol_tx_ip_csum() should be a subset of
dp_packet_hwol_l3_ipv4(). If it's not set then the good flag may not
be checked.


Cheers,
M

>  dp_packet_ol_reset_ip_csum_good(packet);
>  } else {
>  ovs_be16 old_csum = ~nh->ip_csum;
> --
> 2.34.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] odp-execute: Set IPv6 traffic class in AVX implementation.

2024-06-13 Thread Mike Pattrick
On Wed, Jun 12, 2024 at 6:44 AM Emma Finn  wrote:
>
> The AVX implementation for the IPv6 action did not set
> traffic class field. Adding support for this field to
> the AVX implementation.
>
> Signed-off-by: Emma Finn 
> Reported-by: Eelco Chaudron 
> ---
>  lib/odp-execute-avx512.c | 8 
>  lib/packets.c| 2 +-
>  lib/packets.h| 1 +
>  3 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
> index a74a85dc1..569ea789e 100644
> --- a/lib/odp-execute-avx512.c
> +++ b/lib/odp-execute-avx512.c
> @@ -741,6 +741,14 @@ action_avx512_set_ipv6(struct dp_packet_batch *batch, 
> const struct nlattr *a)
>  }
>  /* Write back the modified IPv6 addresses. */
>  _mm512_mask_storeu_epi64((void *) nh, 0x1F, v_new_hdr);
> +
> +/* Scalar method for setting IPv6 tclass field. */
> +if (key->ipv6_tclass) {
> +uint8_t old_tc = ntohl(get_16aligned_be32(>ip6_flow)) >> 20;
> +uint8_t key_tc = (key->ipv6_tclass |
> + (old_tc & ~mask->ipv6_tclass));
> +packet_set_ipv6_tc(>ip6_flow, key_tc);
> +}

Hello,

I'm wondering if we also need to set the flow label?

Thanks,
M

>  }
>  }
>  #endif /* HAVE_AVX512VBMI */
> diff --git a/lib/packets.c b/lib/packets.c
> index ebf516d67..91c28daf0 100644
> --- a/lib/packets.c
> +++ b/lib/packets.c
> @@ -1299,7 +1299,7 @@ packet_set_ipv6_flow_label(ovs_16aligned_be32 
> *flow_label, ovs_be32 flow_key)
>  put_16aligned_be32(flow_label, new_label);
>  }
>
> -static void
> +void
>  packet_set_ipv6_tc(ovs_16aligned_be32 *flow_label, uint8_t tc)
>  {
>  ovs_be32 old_label = get_16aligned_be32(flow_label);
> diff --git a/lib/packets.h b/lib/packets.h
> index 8b6994809..a102f8163 100644
> --- a/lib/packets.h
> +++ b/lib/packets.h
> @@ -1635,6 +1635,7 @@ void packet_set_ipv6_addr(struct dp_packet *packet, 
> uint8_t proto,
>bool recalculate_csum);
>  void packet_set_ipv6_flow_label(ovs_16aligned_be32 *flow_label,
>  ovs_be32 flow_key);
> +void packet_set_ipv6_tc(ovs_16aligned_be32 *flow_label, uint8_t tc);
>  void packet_set_tcp_port(struct dp_packet *, ovs_be16 src, ovs_be16 dst);
>  void packet_set_udp_port(struct dp_packet *, ovs_be16 src, ovs_be16 dst);
>  void packet_set_sctp_port(struct dp_packet *, ovs_be16 src, ovs_be16 dst);
> --
> 2.34.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ofproto: Add upcall/dump-ufid-rules command to map UFIDs to OpenFlow.

2024-06-13 Thread Mike Pattrick
On Tue, Jun 4, 2024 at 7:29 AM Dumitru Ceara  wrote:
>
> It improves the debugging experience if we can easily get a list of
> OpenFlow rules and groups that contribute to the creation of a datapath
> flow.
>
> The suggested workflow is:
> a. dump datapath flows (along with UUIDs), this also prints the core IDs
> (PMD IDs) when applicable.
>
>   $ ovs-appctl dpctl/dump-flows -m
>   flow-dump from pmd on cpu core: 7
>   ufid:7460db8f..., recirc_id(0), 
>
> b. dump related OpenFlow rules and groups:
>   $ ovs-appctl upcall/dump-ufid-rules ufid:7460db8f... pmd=7
>   cookie=0x12345678, table=0 
> priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
>   cookie=0x0, table=1 priority=200,actions=group:1
>   group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
>   cookie=0x0, table=2 actions=output:1
>
> The new command only shows rules and groups attached to ukeys that are
> in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
> other ukeys should not be relevant for the use case presented above.
>
> For ukeys that don't have an xcache populated yet, the command goes
> ahead and populates one.  In theory this is creates a slight overhead as
> those ukeys might not need an xcache until they see traffic (and get
> revalidated) but in practice the overhead should be minimal.
>
> This commit tries to mimic the output format of the ovs-ofctl
> dump-flows/dump-groups commands.  For groups it actually uses
> ofputil_group/_bucket functions for formatting.  For rules it uses
> flow_stats_ds() (the original function was exported and renamed to
> ofproto_rule_stats_ds()).
>
> Signed-off-by: Dumitru Ceara 
> ---
> V2:
> - Addressed Adrian's comments:
>   - check return value of populate_xcache()
>   - use flow_stats_ds() (renamed to ofproto_rule_stats_ds()) instead of
> custom printing
>   - move ukey->state check to caller
>   - handle case when group bucket is not available
>   - update test case to cover the point above
> ---
>  NEWS|   3 +
>  include/openvswitch/ofp-group.h |   7 ++
>  lib/ofp-group.c | 110 +++-
>  ofproto/ofproto-dpif-upcall.c   |  85 
>  ofproto/ofproto-dpif.c  |  24 +++
>  ofproto/ofproto-dpif.h  |   2 +
>  ofproto/ofproto-provider.h  |   1 +
>  ofproto/ofproto.c   |  85 
>  tests/ofproto-dpif.at   |  47 ++
>  tests/ofproto-macros.at |   4 ++
>  10 files changed, 282 insertions(+), 86 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 5ae0108d552b..1bc085f97045 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -9,6 +9,9 @@ Post-v3.3.0
>   https://github.com/openvswitch/ovs.git
> - DPDK:
>   * OVS validated with DPDK 23.11.1.
> +   - ovs-appctl:
> + * Added 'upcall/dump-ufid-rules' to output the set of OpenFlow rules and
> +   groups that contributed to the creation of a specific datapath flow.
>
>
>  v3.3.0 - 16 Feb 2024
> diff --git a/include/openvswitch/ofp-group.h b/include/openvswitch/ofp-group.h
> index cd7af0ebff9c..79fcb3a4c0d1 100644
> --- a/include/openvswitch/ofp-group.h
> +++ b/include/openvswitch/ofp-group.h
> @@ -70,6 +70,11 @@ struct ofputil_bucket *ofputil_bucket_find(const struct 
> ovs_list *,
>  bool ofputil_bucket_check_duplicate_id(const struct ovs_list *);
>  struct ofputil_bucket *ofputil_bucket_list_front(const struct ovs_list *);
>  struct ofputil_bucket *ofputil_bucket_list_back(const struct ovs_list *);
> +void ofputil_bucket_format(const struct ofputil_bucket *,
> +   enum ofp11_group_type, enum ofp_version,
> +   const struct ofputil_port_map *,
> +   const struct ofputil_table_map *,
> +   struct ds *);
>
>  static inline bool
>  ofputil_bucket_has_liveness(const struct ofputil_bucket *bucket)
> @@ -88,6 +93,8 @@ struct ofputil_group_props {
>  void ofputil_group_properties_destroy(struct ofputil_group_props *);
>  void ofputil_group_properties_copy(struct ofputil_group_props *to,
> const struct ofputil_group_props *from);
> +void ofputil_group_properties_format(const struct ofputil_group_props *,
> + struct ds *);
>  /* Protocol-independent group_mod. */
>  struct ofputil_group_mod {
>  uint16_t command; /* One of OFPGC15_*. */
> diff --git a/lib/ofp-group.c b/lib/ofp-group.c
> index 737f48047b10..28504c068c60 100644
> --- a/lib/ofp-group.c
> +++ b/lib/ofp-group.c
> @@ -58,14 +58,16 @@ ofputil_group_from_string(const char *s, uint32_t 
> *group_idp)
>  return true;
>  }
>
> -/* Appends to 's' a string representation of the OpenFlow group ID 
> 'group_id'.
> - * Most groups' string representation is just the number, but for special
> - * groups, e.g. OFPG_ALL, it is the name, e.g. "ALL". */
> +/* Appends to 's' a string representation of the 

Re: [ovs-dev] [PATCH v2] netdev-dpdk: Use LSC interrupt mode.

2024-06-14 Thread Mike Pattrick
On Fri, Jun 14, 2024 at 2:48 AM David Marchand
 wrote:
>
> Querying link status may get delayed for an undeterministic (long) time
> with mlx5 ports. This is a consequence of the mlx5 driver calling ethtool
> kernel API and getting stuck on the kernel RTNL lock while some other
> operation is in progress under this lock.
>
> One impact for long link status query is that it is called under the bond
> lock taken in write mode periodically in bond_run().
> In parallel, datapath threads may block requesting to read bonding related
> info (like for example in bond_check_admissibility()).
>
> The LSC interrupt mode is available with many DPDK drivers and is used by
> default with testpmd.
>
> It seems safe enough to switch on this feature by default in OVS.
> We keep the per interface option to disable this feature in case of an
> unforeseen bug.
>
> Signed-off-by: David Marchand 
> ---
> Changes since v1:
> - (early) fail when interrupt lsc is requested by user but not supported
>   by the driver,
> - otherwise, log a debug message if user did not request interrupt mode,
>
> ---
>  Documentation/topics/dpdk/phy.rst |  4 ++--
>  NEWS  |  3 +++
>  lib/netdev-dpdk.c | 13 -
>  vswitchd/vswitch.xml  |  8 
>  4 files changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/Documentation/topics/dpdk/phy.rst 
> b/Documentation/topics/dpdk/phy.rst
> index efd168cba8..eefc25613d 100644
> --- a/Documentation/topics/dpdk/phy.rst
> +++ b/Documentation/topics/dpdk/phy.rst
> @@ -546,8 +546,8 @@ the firmware every time to fulfil this request.
>
>  Note that not all PMD drivers support LSC interrupts.
>
> -The default configuration is polling mode. To set interrupt mode, option
> -``dpdk-lsc-interrupt`` has to be set to ``true``.
> +The default configuration is interrupt mode. To set polling mode, option
> +``dpdk-lsc-interrupt`` has to be set to ``false``.
>
>  Command to set interrupt mode for a specific interface::
>  $ ovs-vsctl set interface  options:dpdk-lsc-interrupt=true
> diff --git a/NEWS b/NEWS
> index 5ae0108d55..1e19beb793 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -9,6 +9,9 @@ Post-v3.3.0
>   https://github.com/openvswitch/ovs.git
> - DPDK:
>   * OVS validated with DPDK 23.11.1.
> + * Link status changes are now handled via interrupt mode if the DPDK
> +   driver supports it.  It is possible to revert to polling mode by 
> setting
> +   per interface 'dpdk-lsc-interrupt' other_config to 'false'.

As Robin points out, other_config should be changed to options.But the
rest looks good.

Acked-by: Mike Pattrick 

>
>
>  v3.3.0 - 16 Feb 2024
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 0fa37d5145..a260bc8485 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2397,7 +2397,18 @@ netdev_dpdk_set_config(struct netdev *netdev, const 
> struct smap *args,
>  }
>  }
>
> -lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", false);
> +lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", true);
> +if (lsc_interrupt_mode && !(*info.dev_flags & RTE_ETH_DEV_INTR_LSC)) {
> +if (smap_get(args, "dpdk-lsc-interrupt")) {
> +VLOG_ERR("interface '%s': link status interrupt is not 
> supported.",
> + netdev_get_name(netdev));
> +err = EINVAL;
> +goto out;
> +}
> +VLOG_DBG("interface '%s': not enabling link status interrupt.",
> + netdev_get_name(netdev));
> +lsc_interrupt_mode = false;
> +}
>  if (dev->requested_lsc_interrupt_mode != lsc_interrupt_mode) {
>  dev->requested_lsc_interrupt_mode = lsc_interrupt_mode;
>  netdev_request_reconfigure(netdev);
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 8a1b607d71..e3afb78a4e 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -4647,12 +4647,12 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 
> type=patch options:peer=p1 \
>type='{"type": "boolean"}'>
>  
> -  Set this value to true to configure interrupt mode for
> -  Link State Change (LSC) detection instead of poll mode for the DPDK
> -  interface.
> +  Set this value to false to configure poll mode for
> +  Link State Change (LSC) detection instead of interrupt mode for the
> +  DPDK interface.
>  
>  
> -  If this value is not set, poll mode is configured.
> +  If this value is not set, interrupt mode is configured.
>  
>  
>This parameter has an effect only on netdev dpdk interfaces.
> --
> 2.44.0
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovsdb: Use table indexes if available for ovsdb_query().

2024-06-17 Thread Mike Pattrick
On Mon, Jun 17, 2024 at 12:54 PM Mike Pattrick  wrote:
>
> Currently all OVSDB database queries except for UUID lookups all result
> in linear lookups over the entire table, even if an index is present.
>
> This patch modifies ovsdb_query() to attempt an index lookup first, if
> possible. If no matching indexes are present then a linear index is
> still conducted.
>
> To test this, I set up an ovsdb database with a variable number of rows
> and timed the average of how long ovsdb-client took to query a single
> row. The first two tests involved a linear scan that didn't match any
> rows, so there was no overhead associated with sending or encoding
> output. The post-patch linear scan was a worst case scenario where the
> table did have an appropriate index but the conditions made its usage
> impossible. The indexed lookup test was for a matching row, which did
> also include overhead associated with a match. The results are included
> in the table below.
>
> Rows   | 100k | 200k | 300k | 400k | 500k
> ---+--+--+--+--+-
> Pre-patch linear scan  |  9ms | 24ms | 37ms | 49ms | 61ms
> Post-patch linear scan |  9ms | 24ms | 38ms | 49ms | 61ms
> Indexed lookup |  3ms |  3ms |  3ms |  3ms |  3ms
>
> I also tested the performance of ovsdb_query() by wrapping it in a loop
> and measuring the time it took to perform 1000 linear scans on 1, 10,
> 100k, and 200k rows. This test showed that the new index checking code
> did not slow down worst case lookups to a statistically detectable
> degree.
>
> Reported-at: https://issues.redhat.com/browse/FDP-590
> Signed-off-by: Mike Pattrick 
>
> ---
>
> v2:
>  - Included txn in index code
>  - Added benchmarks
>  - Refactored code
>  - Added more tests
>  - Now a mock row is created to perform the search with standard
>  functions
> Signed-off-by: Mike Pattrick 

Recheck-request: github-robot

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovsdb: Use table indexes if available for ovsdb_query().

2024-06-17 Thread Mike Pattrick
On Mon, Jun 3, 2024 at 2:01 PM Ilya Maximets  wrote:
>
> On 6/3/24 06:20, Mike Pattrick wrote:
> > Currently all OVSDB database queries except for UUID lookups all result
> > in linear lookups over the entire table, even if an index is present.
> >
> > This patch modifies ovsdb_query() to attempt an index lookup first, if
> > possible. If no matching indexes are present then a linear index is
> > still conducted.
> >
> > Reported-at: https://issues.redhat.com/browse/FDP-590
> > Signed-off-by: Mike Pattrick 
> > ---
> >  NEWS |   3 ++
> >  ovsdb/query.c| 102 +++
> >  ovsdb/row.h  |  28 +++
> >  ovsdb/transaction.c  |  27 ---
> >  tests/ovsdb-execution.at |  34 -
> >  tests/ovsdb-server.at|   2 +-
> >  tests/ovsdb-tool.at  |   2 +-
> >  7 files changed, 159 insertions(+), 39 deletions(-)
>
> Hi, Mike.  Thanks for the patch.
>
> Besides what Simon asked, the patch has a few other issues:
>
> 1. Lookup is performed only on the committed index and it doesn't include
>rows that are in-flight in the current transaction.
>
>Unlike rows in a hash table, indexes are updated only after the whole
>transaction is committed.  With this change we'll not be able to find
>newly added rows.
>
>Another thing related to this is that it is allowed to have duplicates
>within a transaction as long as they are removed before the transaction
>ends.  So it is possible that multiple rows will satisfy the condition
>on indexed columns while the transaction is in-flight.
>
>Consider the following commands executed in a sandbox:
>
># ovs-vsctl set-manager "tcp:my-first-target"
># ovsdb-client transact unix:$(pwd)/sandbox/db.sock '
>["Open_vSwitch",
> {"op": "select",
>  "table": "Manager",
>  "columns": ["_uuid", "target"],
>  "where": [["target", "==", "tcp:my-first-target"]]},
> {"op": "insert",
>  "table": "Manager",
>  "uuid-name": "duplicate",
>  "row": {"target": "tcp:my-first-target"}},
> {"op": "select",
>  "table": "Manager",
>  "columns": ["_uuid", "target"],
>  "where": [["target", "==", "tcp:my-first-target"]]},
> {"op": "delete",
>  "table": "Manager",
>  "where":[["_uuid","==",["named-uuid","duplicate"]]]},
> {"op": "select",
>  "table": "Manager",
>  "columns": ["_uuid", "target"],
>  "where": [["target", "==", "tcp:my-first-target"]]}]'
>
>Transaction must succeed.  The first selection should return 1 row,
>the second should return both duplicates and the third should again
>return one row.

This is a good point, I hadn't anticipated this use-case but it does
have a large impact on this change. After working through a few
implementations, I wasn't able to find a solution that wasn't overly
complex. For the next version, I've instead opted to exclude indexed
lookups from transactions that modify the associated row.

The next version should address this and the other feedback.

Cheers,
M

>
>Ideally, implementation should not leak the transaction details to
>the query module, though I'm not sure if that is 100% achievable.
>
> 2. Taking above case into account, this change needs way more unit tests
>with different order of operations and complex data updates.
>
> 3. Since this is a performance-oriented change, please, include some
>performance numbers in the commit message as well, including impact
>on non-indexed lookups, if any.
>
> 4. There seems to be a lot of logic overlap with existing functions like
>ovsdb_condition_match_every_clause(), ovsdb_index_search() and
>ovsdb_row_hash_columns().  Can we re-use those instead?  For example,
>by creating a row from the conditions before the lookup?  What a
>performance impact will look like?
>
> Best regards, Ilya Maximets.
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] ovsdb: Use table indexes if available for ovsdb_query().

2024-06-17 Thread Mike Pattrick
Currently all OVSDB database queries except for UUID lookups all result
in linear lookups over the entire table, even if an index is present.

This patch modifies ovsdb_query() to attempt an index lookup first, if
possible. If no matching indexes are present then a linear index is
still conducted.

To test this, I set up an ovsdb database with a variable number of rows
and timed the average of how long ovsdb-client took to query a single
row. The first two tests involved a linear scan that didn't match any
rows, so there was no overhead associated with sending or encoding
output. The post-patch linear scan was a worst case scenario where the
table did have an appropriate index but the conditions made its usage
impossible. The indexed lookup test was for a matching row, which did
also include overhead associated with a match. The results are included
in the table below.

Rows   | 100k | 200k | 300k | 400k | 500k
---+--+--+--+--+-
Pre-patch linear scan  |  9ms | 24ms | 37ms | 49ms | 61ms
Post-patch linear scan |  9ms | 24ms | 38ms | 49ms | 61ms
Indexed lookup |  3ms |  3ms |  3ms |  3ms |  3ms

I also tested the performance of ovsdb_query() by wrapping it in a loop
and measuring the time it took to perform 1000 linear scans on 1, 10,
100k, and 200k rows. This test showed that the new index checking code
did not slow down worst case lookups to a statistically detectable
degree.

Reported-at: https://issues.redhat.com/browse/FDP-590
Signed-off-by: Mike Pattrick 

---

v2:
 - Included txn in index code
 - Added benchmarks
 - Refactored code
 - Added more tests
 - Now a mock row is created to perform the search with standard
 functions
Signed-off-by: Mike Pattrick 
---
 ovsdb/execution.c|  20 +++--
 ovsdb/query.c| 174 +++
 ovsdb/query.h|   6 +-
 ovsdb/rbac.c |  15 ++--
 ovsdb/rbac.h |  10 ++-
 ovsdb/row.h  |  28 +++
 ovsdb/transaction.c  |  29 +--
 ovsdb/transaction.h  |   5 ++
 tests/ovsdb-execution.at | 108 +++-
 tests/ovsdb-macros.at|  10 +++
 tests/ovsdb-query.at |  18 ++--
 tests/ovsdb-server.at|   2 +-
 tests/ovsdb-tool.at  |   2 +-
 tests/test-ovsdb.c   |  15 +++-
 14 files changed, 363 insertions(+), 79 deletions(-)

diff --git a/ovsdb/execution.c b/ovsdb/execution.c
index f4cc9e802..212839bca 100644
--- a/ovsdb/execution.c
+++ b/ovsdb/execution.c
@@ -459,7 +459,7 @@ ovsdb_execute_select(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 if (!error) {
 struct ovsdb_row_set rows = OVSDB_ROW_SET_INITIALIZER;
 
-ovsdb_query_distinct(table, , , );
+ovsdb_query_distinct(table, , , , x->txn);
 ovsdb_row_set_sort(, );
 json_object_put(result, "rows",
 ovsdb_row_set_to_json(, ));
@@ -545,8 +545,8 @@ ovsdb_execute_update(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 ur.row = row;
 ur.columns = 
 if (ovsdb_rbac_update(x->db, table, , , x->role,
-  x->id)) {
-ovsdb_query(table, , update_row_cb, );
+  x->id, x->txn)) {
+ovsdb_query(table, , update_row_cb, , x->txn);
 } else {
 error = ovsdb_perm_error("RBAC rules for client \"%s\" role "
  "\"%s\" prohibit modification of "
@@ -626,7 +626,7 @@ ovsdb_execute_mutate(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 json_integer_create(hmap_count(>rows)));
 } else {
 size_t row_count = 0;
-ovsdb_query(table, , count_row_cb, _count);
+ovsdb_query(table, , count_row_cb, _count, x->txn);
 json_object_put(result, "count",
 json_integer_create(row_count));
 }
@@ -636,8 +636,8 @@ ovsdb_execute_mutate(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 mr.mutations = 
 mr.error = 
 if (ovsdb_rbac_mutate(x->db, table, , , x->role,
-  x->id)) {
-ovsdb_query(table, , mutate_row_cb, );
+  x->id, x->txn)) {
+ovsdb_query(table, , mutate_row_cb, , x->txn);
 } else {
 error = ovsdb_perm_error("RBAC rules for client \"%s\" role "
  "\"%s\" prohibit mutate operation on "
@@ -693,8 +693,9 @@ ovsdb_execute_delete(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 dr.table = table;
 dr.txn = x->txn;
 
-if (ovsdb_rbac_delete(x->db, table, , x->role, x->id)) {
-ovsdb_query(table, , delete_row_cb, );
+if (ovsdb_rbac_delete(x->db, tab

Re: [ovs-dev] [PATCH v3] ofproto: Add upcall/dump-ufid-rules command to map UFIDs to OpenFlow.

2024-06-18 Thread Mike Pattrick
On Fri, Jun 14, 2024 at 6:45 AM Dumitru Ceara  wrote:
>
> It improves the debugging experience if we can easily get a list of
> OpenFlow rules and groups that contribute to the creation of a datapath
> flow.
>
> The suggested workflow is:
> a. dump datapath flows (along with UUIDs), this also prints the core IDs
> (PMD IDs) when applicable.
>
>   $ ovs-appctl dpctl/dump-flows -m
>   flow-dump from pmd on cpu core: 7
>   ufid:7460db8f..., recirc_id(0), 
>
> b. dump related OpenFlow rules and groups:
>   $ ovs-appctl upcall/dump-ufid-rules ufid:7460db8f... pmd=7
>   cookie=0x12345678, table=0 
> priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
>   cookie=0x0, table=1 priority=200,actions=group:1
>   group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
>   cookie=0x0, table=2 actions=output:1
>
> The new command only shows rules and groups attached to ukeys that are
> in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
> other ukeys should not be relevant for the use case presented above.
>
> For ukeys that don't have an xcache populated yet, the command goes
> ahead and populates one.  In theory this is creates a slight overhead as
> those ukeys might not need an xcache until they see traffic (and get
> revalidated) but in practice the overhead should be minimal.
>
> This commit tries to mimic the output format of the ovs-ofctl
> dump-flows/dump-groups commands.  For groups it actually uses
> ofputil_group/_bucket functions for formatting.  For rules it uses
> flow_stats_ds() (the original function was exported and renamed to
> ofproto_rule_stats_ds()).
>
> Signed-off-by: Dumitru Ceara 
> ---
> V3:
> - Addressed Mike's comments:
>   - use ds_put_char()
>   - make the test less flaky
> V2:
> - Addressed Adrian's comments:
>   - check return value of populate_xcache()
>   - use flow_stats_ds() (renamed to ofproto_rule_stats_ds()) instead of
> custom printing
>   - move ukey->state check to caller
>   - handle case when group bucket is not available
>   - update test case to cover the point above
> ---
>  NEWS|   3 +
>  include/openvswitch/ofp-group.h |   7 ++
>  lib/ofp-group.c | 110 +++-
>  ofproto/ofproto-dpif-upcall.c   |  85 
>  ofproto/ofproto-dpif.c  |  24 +++
>  ofproto/ofproto-dpif.h  |   2 +
>  ofproto/ofproto-provider.h  |   1 +
>  ofproto/ofproto.c   |  85 
>  tests/ofproto-dpif.at   |  47 ++
>  tests/ofproto-macros.at |   9 ++-
>  10 files changed, 285 insertions(+), 88 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 5ae0108d552b..1bc085f97045 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -9,6 +9,9 @@ Post-v3.3.0
>   https://github.com/openvswitch/ovs.git
> - DPDK:
>   * OVS validated with DPDK 23.11.1.
> +   - ovs-appctl:
> + * Added 'upcall/dump-ufid-rules' to output the set of OpenFlow rules and
> +   groups that contributed to the creation of a specific datapath flow.
>
>
>  v3.3.0 - 16 Feb 2024
> diff --git a/include/openvswitch/ofp-group.h b/include/openvswitch/ofp-group.h
> index cd7af0ebff9c..79fcb3a4c0d1 100644
> --- a/include/openvswitch/ofp-group.h
> +++ b/include/openvswitch/ofp-group.h
> @@ -70,6 +70,11 @@ struct ofputil_bucket *ofputil_bucket_find(const struct 
> ovs_list *,
>  bool ofputil_bucket_check_duplicate_id(const struct ovs_list *);
>  struct ofputil_bucket *ofputil_bucket_list_front(const struct ovs_list *);
>  struct ofputil_bucket *ofputil_bucket_list_back(const struct ovs_list *);
> +void ofputil_bucket_format(const struct ofputil_bucket *,
> +   enum ofp11_group_type, enum ofp_version,
> +   const struct ofputil_port_map *,
> +   const struct ofputil_table_map *,
> +   struct ds *);
>
>  static inline bool
>  ofputil_bucket_has_liveness(const struct ofputil_bucket *bucket)
> @@ -88,6 +93,8 @@ struct ofputil_group_props {
>  void ofputil_group_properties_destroy(struct ofputil_group_props *);
>  void ofputil_group_properties_copy(struct ofputil_group_props *to,
> const struct ofputil_group_props *from);
> +void ofputil_group_properties_format(const struct ofputil_group_props *,
> + struct ds *);
>  /* Protocol-independent group_mod. */
>  struct ofputil_group_mod {
>  uint16_t command; /* One of OFPGC15_*. */
> diff --git a/lib/ofp-group.c b/lib/ofp-group.c
> index 737f48047b10..0be453d15203 100644
> --- a/lib/ofp-group.c
> +++ b/lib/ofp-group.c
> @@ -58,14 +58,16 @@ ofputil_group_from_string(const char *s, uint32_t 
> *group_idp)
>  return true;
>  }
>
> -/* Appends to 's' a string representation of the OpenFlow group ID 
> 'group_id'.
> - * Most groups' string representation is just the number, but for special
> - * groups, 

[ovs-dev] [PATCH] ovsdb: Use table indexes if available for ovsdb_query().

2024-06-02 Thread Mike Pattrick
Currently all OVSDB database queries except for UUID lookups all result
in linear lookups over the entire table, even if an index is present.

This patch modifies ovsdb_query() to attempt an index lookup first, if
possible. If no matching indexes are present then a linear index is
still conducted.

Reported-at: https://issues.redhat.com/browse/FDP-590
Signed-off-by: Mike Pattrick 
---
 NEWS |   3 ++
 ovsdb/query.c| 102 +++
 ovsdb/row.h  |  28 +++
 ovsdb/transaction.c  |  27 ---
 tests/ovsdb-execution.at |  34 -
 tests/ovsdb-server.at|   2 +-
 tests/ovsdb-tool.at  |   2 +-
 7 files changed, 159 insertions(+), 39 deletions(-)

diff --git a/NEWS b/NEWS
index b92cec532..89f64d6f6 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,9 @@ Post-v3.3.0
- The primary development branch has been renamed from 'master' to 'main'.
  The OVS tree remains hosted on GitHub.
  https://github.com/openvswitch/ovs.git
+   - OVSDB:
+ * Added support to lookup rows based on arbitrary indexes, instead of
+   only UUID indexes.
 
 
 v3.3.0 - 16 Feb 2024
diff --git a/ovsdb/query.c b/ovsdb/query.c
index eebe56412..e3e50a034 100644
--- a/ovsdb/query.c
+++ b/ovsdb/query.c
@@ -21,32 +21,116 @@
 #include "condition.h"
 #include "row.h"
 #include "table.h"
+#include "transaction.h"
+
+static bool
+ovsdb_query_index(struct ovsdb_table *table,
+  const struct ovsdb_condition *cnd,
+  const struct ovsdb_row **out)
+{
+for (size_t idx = 0; idx < table->schema->n_indexes; idx++) {
+const struct ovsdb_column_set *index = >schema->indexes[idx];
+struct hmap_node *node;
+size_t matches = 0;
+uint32_t hash = 0;
+
+if (index->n_columns != cnd->n_clauses) {
+continue;
+}
+
+/* The conditions may not be in the same order as the index. */
+for (size_t c = 0; c < cnd->n_clauses; c++) {
+const struct ovsdb_clause *cnd_cls = >clauses[c];
+
+if (cnd_cls->function != OVSDB_F_EQ) {
+return false;
+}
+
+for (size_t i = 0; i < index->n_columns; i++) {
+const struct ovsdb_column *idx_col = index->columns[i];
+
+if (cnd_cls->index == idx_col->index) {
+hash = ovsdb_datum_hash(_cls->arg, _col->type,
+hash);
+matches++;
+break;
+}
+}
+
+/* If none of the indexed columns match, continue to the next
+ * index. */
+if (matches == c) {
+break;
+}
+}
+
+if (matches != cnd->n_clauses) {
+continue;
+}
+
+for (node = hmap_first_with_hash(>indexes[idx], hash); node;
+ node = hmap_next_with_hash(node)) {
+struct ovsdb_row *irow = ovsdb_row_from_index_node(node, table,
+   idx);
+
+for (size_t c = 0; c < cnd->n_clauses; c++) {
+const struct ovsdb_clause *cnd_cls = >clauses[c];
+
+if (!ovsdb_datum_equals(_cls->arg,
+>fields[cnd_cls->index],
+_cls->column->type)) {
+irow = NULL;
+break;
+}
+}
+
+if (irow) {
+*out = irow;
+return true;
+}
+}
+
+/* In the case that there was a matching index but no matching row, the
+ * index check is still considered to be a success. */
+return true;
+}
+return false;
+}
 
 void
 ovsdb_query(struct ovsdb_table *table, const struct ovsdb_condition *cnd,
 bool (*output_row)(const struct ovsdb_row *, void *aux), void *aux)
 {
+const struct ovsdb_row *row = NULL;
+
 if (cnd->n_clauses > 0
 && cnd->clauses[0].column->index == OVSDB_COL_UUID
 && cnd->clauses[0].function == OVSDB_F_EQ) {
 /* Optimize the case where the query has a clause of the form "uuid ==
  * ", since we have an index on UUID. */
-const struct ovsdb_row *row;
 
 row = ovsdb_table_get_row(table, >clauses[0].arg.keys[0].uuid);
 if (row && row->table == table &&
 ovsdb_condition_match_every_clause(row, cnd)) {
 output_row(row, aux);
 }
-} else {
-/* Linear scan. */
-const struct ovsdb_row *row;
+return;
+}
 
-HMAP_FOR_EACH_SAFE (row, hmap_node, >rows) {
-if (ovsdb_condition_match_every_clause(row, cnd) &

[ovs-dev] [PATCH v2] ovsdb: Use table indexes if available for ovsdb_query().

2024-06-02 Thread Mike Pattrick
Currently all OVSDB database queries except for UUID lookups all result
in linear lookups over the entire table, even if an index is present.

This patch modifies ovsdb_query() to attempt an index lookup first, if
possible. If no matching indexes are present then a linear index is
still conducted.

Reported-at: https://issues.redhat.com/browse/FDP-590
Signed-off-by: Mike Pattrick 
---
 NEWS |   3 ++
 ovsdb/query.c| 102 +++
 ovsdb/row.h  |  28 +++
 ovsdb/transaction.c  |  27 ---
 tests/ovsdb-execution.at |  34 -
 tests/ovsdb-server.at|   2 +-
 tests/ovsdb-tool.at  |   2 +-
 7 files changed, 159 insertions(+), 39 deletions(-)

diff --git a/NEWS b/NEWS
index 5ae0108d5..616fd19a4 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,9 @@ Post-v3.3.0
  https://github.com/openvswitch/ovs.git
- DPDK:
  * OVS validated with DPDK 23.11.1.
+   - OVSDB:
+ * Added support to lookup rows based on arbitrary indexes, instead of
+   only UUID indexes.
 
 
 v3.3.0 - 16 Feb 2024
diff --git a/ovsdb/query.c b/ovsdb/query.c
index eebe56412..e3e50a034 100644
--- a/ovsdb/query.c
+++ b/ovsdb/query.c
@@ -21,32 +21,116 @@
 #include "condition.h"
 #include "row.h"
 #include "table.h"
+#include "transaction.h"
+
+static bool
+ovsdb_query_index(struct ovsdb_table *table,
+  const struct ovsdb_condition *cnd,
+  const struct ovsdb_row **out)
+{
+for (size_t idx = 0; idx < table->schema->n_indexes; idx++) {
+const struct ovsdb_column_set *index = >schema->indexes[idx];
+struct hmap_node *node;
+size_t matches = 0;
+uint32_t hash = 0;
+
+if (index->n_columns != cnd->n_clauses) {
+continue;
+}
+
+/* The conditions may not be in the same order as the index. */
+for (size_t c = 0; c < cnd->n_clauses; c++) {
+const struct ovsdb_clause *cnd_cls = >clauses[c];
+
+if (cnd_cls->function != OVSDB_F_EQ) {
+return false;
+}
+
+for (size_t i = 0; i < index->n_columns; i++) {
+const struct ovsdb_column *idx_col = index->columns[i];
+
+if (cnd_cls->index == idx_col->index) {
+hash = ovsdb_datum_hash(_cls->arg, _col->type,
+hash);
+matches++;
+break;
+}
+}
+
+/* If none of the indexed columns match, continue to the next
+ * index. */
+if (matches == c) {
+break;
+}
+}
+
+if (matches != cnd->n_clauses) {
+continue;
+}
+
+for (node = hmap_first_with_hash(>indexes[idx], hash); node;
+ node = hmap_next_with_hash(node)) {
+struct ovsdb_row *irow = ovsdb_row_from_index_node(node, table,
+   idx);
+
+for (size_t c = 0; c < cnd->n_clauses; c++) {
+const struct ovsdb_clause *cnd_cls = >clauses[c];
+
+if (!ovsdb_datum_equals(_cls->arg,
+>fields[cnd_cls->index],
+_cls->column->type)) {
+irow = NULL;
+break;
+}
+}
+
+if (irow) {
+*out = irow;
+return true;
+}
+}
+
+/* In the case that there was a matching index but no matching row, the
+ * index check is still considered to be a success. */
+return true;
+}
+return false;
+}
 
 void
 ovsdb_query(struct ovsdb_table *table, const struct ovsdb_condition *cnd,
 bool (*output_row)(const struct ovsdb_row *, void *aux), void *aux)
 {
+const struct ovsdb_row *row = NULL;
+
 if (cnd->n_clauses > 0
 && cnd->clauses[0].column->index == OVSDB_COL_UUID
 && cnd->clauses[0].function == OVSDB_F_EQ) {
 /* Optimize the case where the query has a clause of the form "uuid ==
  * ", since we have an index on UUID. */
-const struct ovsdb_row *row;
 
 row = ovsdb_table_get_row(table, >clauses[0].arg.keys[0].uuid);
 if (row && row->table == table &&
 ovsdb_condition_match_every_clause(row, cnd)) {
 output_row(row, aux);
 }
-} else {
-/* Linear scan. */
-const struct ovsdb_row *row;
+return;
+}
 
-HMAP_FOR_EACH_SAFE (row, hmap_node, >rows) {
-if (ovsdb_condition_match_every_clause(row, cnd) &&
-!output_row(row, aux)) {
-  

Re: [ovs-dev] [PATCH] ofp-prop: Fix unaligned 128 bit access.

2024-06-12 Thread Mike Pattrick
On Wed, Jun 12, 2024 at 9:50 AM Ales Musil  wrote:

>
>
> On Wed, Jun 12, 2024 at 3:32 PM Mike Pattrick  wrote:
>
>> When compiling with '-fsanitize=address,undefined', the "ovs-ofctl
>> ct-flush" test will yield the following undefined behavior flagged
>> by UBSan. This patch uses memcpy to move the 128bit value into the
>> stack before reading it.
>>
>> lib/ofp-prop.c:277:14: runtime error: load of misaligned address
>> for type 'union ovs_be128', which requires 8 byte alignment
>>   ^
>> #0 0x7735d4 in ofpprop_parse_u128 lib/ofp-prop.c:277
>> #1 0x6c6c83 in ofp_ct_match_decode lib/ofp-ct.c:529
>> #2 0x76f3b5 in ofp_print_nxt_ct_flush lib/ofp-print.c:959
>> #3 0x76f3b5 in ofp_to_string__ lib/ofp-print.c:1206
>> #4 0x76f3b5 in ofp_to_string lib/ofp-print.c:1264
>> #5 0x770c0d in ofp_print lib/ofp-print.c:1308
>> #6 0x484a9d in ofctl_ofp_print utilities/ovs-ofctl.c:4899
>> #7 0x4ddb77 in ovs_cmdl_run_command__ lib/command-line.c:247
>> #8 0x47f6b3 in main utilities/ovs-ofctl.c:186
>>
>> Signed-off-by: Mike Pattrick 
>> ---
>>
>
> Hi Mike,
>
> this is interesting, do you have an idea why it didn't fail in CI by now?
> Also AFAIR the ofprops is supposed to be aligned to 8 bytes so unless the
> buffer itself isn't allocated at an address that is not itself 8 bytes
> aligned it shouldn't happen. In that case we might actually have a problem
> with other sizes.
>

Report is seen with gcc + ubsan, but not clang + ubsan. It is possible that
this is only seen due the test, this warning wasn't seen live.

Cheers,
M


>
>
>>  lib/ofp-prop.c | 4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/ofp-prop.c b/lib/ofp-prop.c
>> index 0a685750c..ed6365414 100644
>> --- a/lib/ofp-prop.c
>> +++ b/lib/ofp-prop.c
>> @@ -271,10 +271,12 @@ enum ofperr
>>  ofpprop_parse_u128(const struct ofpbuf *property, ovs_u128 *value)
>>  {
>>  ovs_be128 *p = property->msg;
>> +ovs_be128 aligned;
>>  if (ofpbuf_msgsize(property) != sizeof *p) {
>>  return OFPERR_OFPBPC_BAD_LEN;
>>  }
>> -*value = ntoh128(*p);
>> +memcpy(, p, sizeof aligned);
>> +*value = ntoh128(aligned);
>>  return 0;
>>  }
>>
>> --
>> 2.39.3
>>
>> ___
>> dev mailing list
>> d...@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>>
> Thanks,
> Ales
>
> --
>
> Ales Musil
>
> Senior Software Engineer - OVN Core
>
> Red Hat EMEA <https://www.redhat.com>
>
> amu...@redhat.com
> <https://red.ht/sig>
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Userspace: Software fallback for UDP encapsulated TCP segmentation.

2024-06-12 Thread Mike Pattrick
On Tue, Jun 4, 2024 at 10:29 AM David Marchand
 wrote:
>
> On Wed, Feb 21, 2024 at 5:09 AM Mike Pattrick  wrote:
> > @@ -105,20 +115,35 @@ dp_packet_gso(struct dp_packet *p, struct 
> > dp_packet_batch **batches)
> >  return false;
> >  }
> >
> > -tcp_hdr = dp_packet_l4(p);
> > -tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
> > -tcp_seq = ntohl(get_16aligned_be32(_hdr->tcp_seq));
> > -hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p))
> > -  + tcp_offset * 4;
> > -ip_id = 0;
> > -if (dp_packet_hwol_is_ipv4(p)) {
> > +if (dp_packet_hwol_is_tunnel_vxlan(p) ||
> > +dp_packet_hwol_is_tunnel_geneve(p)) {
> > +data_pos =  dp_packet_get_inner_tcp_payload(p);
> > +outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
> > +tcp_hdr = dp_packet_inner_l4(p);
> > +ip_hdr = dp_packet_inner_l3(p);
> > +tnl = true;
> > +if (outer_ipv4) {
> > +ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id);
> > +} else if (dp_packet_hwol_is_ipv4(p)) {
> > +ip_id = ntohs(ip_hdr->ip_id);
> > +}
> > +} else {
> > +data_pos = dp_packet_get_tcp_payload(p);
> > +outer_ipv4 = dp_packet_hwol_is_ipv4(p);
> > +tcp_hdr = dp_packet_l4(p);
> >  ip_hdr = dp_packet_l3(p);
> > -ip_id = ntohs(ip_hdr->ip_id);
> > +tnl = false;
> > +if (outer_ipv4) {
> > +ip_id = ntohs(ip_hdr->ip_id);
> > +}
> >  }
> >
> > +tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
> > +tcp_seq = ntohl(get_16aligned_be32(_hdr->tcp_seq));
> > +hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p))
> > +  + tcp_offset * 4;
> >  const char *data_tail = (char *) dp_packet_tail(p)
> >  - dp_packet_l2_pad_size(p);
> > -const char *data_pos = dp_packet_get_tcp_payload(p);
> >  int n_segs = dp_packet_gso_nr_segs(p);
> >
> >  for (int i = 0; i < n_segs; i++) {
> > @@ -130,8 +155,26 @@ dp_packet_gso(struct dp_packet *p, struct 
> > dp_packet_batch **batches)
> >  seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
> >  data_pos += seg_len;
> >
> > +if (tnl) {
> > +/* Update tunnel L3 header. */
> > +if (dp_packet_hwol_is_ipv4(seg)) {
> > +ip_hdr = dp_packet_inner_l3(seg);
> > +ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
> > +   dp_packet_inner_l4_size(seg));
> > +ip_hdr->ip_id = htons(ip_id);
> > +ip_hdr->ip_csum = 0;
> > +ip_id++;
>
> Hum, it seems with this change, we are tying outer and inner (in the
> ipv4 in ipv4 case) ip id together.
> I am unclear what the Linux kernel does or what is acceptable, but I
> prefer to mention.

The Linux kernel does +1 as well. The sending OS should be
sufficiently randomizing IP ID's so this shouldn't be an issue. If
that isn't happening then at least we aren't making things worse.

> I also noticed ip_id is incremented a second time later in this loop
> which I find suspicious.
>
> I wonder if OVS should instead increment outer and inner ip ids
> (copied from original packet data) by 'i'.
> Like ip_hdr->ip_id = htons(ntohs(ip_hdr->ip_id) + i) ?

I thought the current solution was simpler. We rely on the sending
host to pick a random initial value, and just need some way to avoid
picking a value that overlaps with another fragment in flight between
the two hosts. That said, keeping them seperate shouldn't hurt
anything, I can make that change.

> And adjusting the outer udp seems missing (length and checksum if
> needed), which is probably what Ilya meant.

Yes, I  missed this.

Thanks,
M

>
>
> --
> David Marchand
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] ofp-prop: Fix unaligned 128 bit access.

2024-06-12 Thread Mike Pattrick
When compiling with '-fsanitize=address,undefined', the "ovs-ofctl
ct-flush" test will yield the following undefined behavior flagged
by UBSan. This patch uses memcpy to move the 128bit value into the
stack before reading it.

lib/ofp-prop.c:277:14: runtime error: load of misaligned address
for type 'union ovs_be128', which requires 8 byte alignment
  ^
#0 0x7735d4 in ofpprop_parse_u128 lib/ofp-prop.c:277
#1 0x6c6c83 in ofp_ct_match_decode lib/ofp-ct.c:529
#2 0x76f3b5 in ofp_print_nxt_ct_flush lib/ofp-print.c:959
#3 0x76f3b5 in ofp_to_string__ lib/ofp-print.c:1206
#4 0x76f3b5 in ofp_to_string lib/ofp-print.c:1264
#5 0x770c0d in ofp_print lib/ofp-print.c:1308
#6 0x484a9d in ofctl_ofp_print utilities/ovs-ofctl.c:4899
#7 0x4ddb77 in ovs_cmdl_run_command__ lib/command-line.c:247
#8 0x47f6b3 in main utilities/ovs-ofctl.c:186

Signed-off-by: Mike Pattrick 
---
 lib/ofp-prop.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/ofp-prop.c b/lib/ofp-prop.c
index 0a685750c..ed6365414 100644
--- a/lib/ofp-prop.c
+++ b/lib/ofp-prop.c
@@ -271,10 +271,12 @@ enum ofperr
 ofpprop_parse_u128(const struct ofpbuf *property, ovs_u128 *value)
 {
 ovs_be128 *p = property->msg;
+ovs_be128 aligned;
 if (ofpbuf_msgsize(property) != sizeof *p) {
 return OFPERR_OFPBPC_BAD_LEN;
 }
-*value = ntoh128(*p);
+memcpy(, p, sizeof aligned);
+*value = ntoh128(aligned);
 return 0;
 }
 
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] Userspace: Software fallback for UDP encapsulated TCP segmentation.

2024-06-12 Thread Mike Pattrick
When sending packets that are flagged as requiring segmentation to an
interface that doens't support this feature, send the packet to the TSO
software fallback instead of dropping it.

Signed-off-by: Mike Pattrick 

---
v2:
 - Fixed udp tunnel length
 - Added test that UDP headers are correct
 - Split inner and outer ip_id into different counters
 - Set tunnel flags in reset_tcp_seg

Signed-off-by: Mike Pattrick 
---
 lib/dp-packet-gso.c | 89 +
 lib/dp-packet.h | 34 
 lib/netdev-native-tnl.c |  8 
 lib/netdev.c| 37 +++--
 tests/system-traffic.at | 87 
 5 files changed, 216 insertions(+), 39 deletions(-)

diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
index 847685ad9..dc43ad662 100644
--- a/lib/dp-packet-gso.c
+++ b/lib/dp-packet-gso.c
@@ -47,6 +47,8 @@ dp_packet_gso_seg_new(const struct dp_packet *p, size_t 
hdr_len,
 seg->l2_5_ofs = p->l2_5_ofs;
 seg->l3_ofs = p->l3_ofs;
 seg->l4_ofs = p->l4_ofs;
+seg->inner_l3_ofs = p->inner_l3_ofs;
+seg->inner_l4_ofs = p->inner_l4_ofs;
 
 /* The protocol headers remain the same, so preserve hash and mark. */
 *dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p);
@@ -71,7 +73,12 @@ dp_packet_gso_nr_segs(struct dp_packet *p)
 const char *data_tail;
 const char *data_pos;
 
-data_pos = dp_packet_get_tcp_payload(p);
+if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+dp_packet_hwol_is_tunnel_geneve(p)) {
+data_pos = dp_packet_get_inner_tcp_payload(p);
+} else {
+data_pos = dp_packet_get_tcp_payload(p);
+}
 data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p);
 
 return DIV_ROUND_UP(data_tail - data_pos, segsz);
@@ -89,14 +96,19 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch 
**batches)
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 struct dp_packet_batch *curr_batch = *batches;
 struct tcp_header *tcp_hdr;
+struct udp_header *tnl_hdr;
 struct ip_header *ip_hdr;
+uint16_t inner_ip_id = 0;
+uint16_t outer_ip_id = 0;
 struct dp_packet *seg;
+const char *data_pos;
 uint16_t tcp_offset;
 uint16_t tso_segsz;
 uint32_t tcp_seq;
-uint16_t ip_id;
+bool outer_ipv4;
 int hdr_len;
 int seg_len;
+bool tnl;
 
 tso_segsz = dp_packet_get_tso_segsz(p);
 if (!tso_segsz) {
@@ -105,20 +117,38 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch 
**batches)
 return false;
 }
 
-tcp_hdr = dp_packet_l4(p);
-tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
-tcp_seq = ntohl(get_16aligned_be32(_hdr->tcp_seq));
-hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p))
-  + tcp_offset * 4;
-ip_id = 0;
-if (dp_packet_hwol_is_ipv4(p)) {
+if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+dp_packet_hwol_is_tunnel_geneve(p)) {
+data_pos =  dp_packet_get_inner_tcp_payload(p);
+outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
+tcp_hdr = dp_packet_inner_l4(p);
+ip_hdr = dp_packet_inner_l3(p);
+tnl = true;
+
+if (outer_ipv4) {
+outer_ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id);
+}
+if (dp_packet_hwol_is_ipv4(p)) {
+inner_ip_id = ntohs(ip_hdr->ip_id);
+}
+} else {
+data_pos = dp_packet_get_tcp_payload(p);
+outer_ipv4 = dp_packet_hwol_is_ipv4(p);
+tcp_hdr = dp_packet_l4(p);
 ip_hdr = dp_packet_l3(p);
-ip_id = ntohs(ip_hdr->ip_id);
+tnl = false;
+
+if (outer_ipv4) {
+outer_ip_id = ntohs(ip_hdr->ip_id);
+}
 }
 
+tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
+tcp_seq = ntohl(get_16aligned_be32(_hdr->tcp_seq));
+hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p))
+  + tcp_offset * 4;
 const char *data_tail = (char *) dp_packet_tail(p)
 - dp_packet_l2_pad_size(p);
-const char *data_pos = dp_packet_get_tcp_payload(p);
 int n_segs = dp_packet_gso_nr_segs(p);
 
 for (int i = 0; i < n_segs; i++) {
@@ -130,14 +160,35 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch 
**batches)
 seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
 data_pos += seg_len;
 
+if (tnl) {
+/* Update tunnel inner L3 header. */
+if (dp_packet_hwol_is_ipv4(seg)) {
+ip_hdr = dp_packet_inner_l3(seg);
+ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
+   dp_packet_inner_l4_size(seg));
+ip_hdr->ip_id = htons(inner_ip_id);
+ip_hdr->ip_csum = 0;
+inner_ip_id++;
+} else {
+struct ovs_16aligned_ip6_hdr *ip6_hdr;
+
+ 

Re: [ovs-dev] [PATCH] ofp-prop: Fix unaligned 128 bit access.

2024-06-18 Thread Mike Pattrick
On Fri, Jun 14, 2024 at 8:55 AM Ilya Maximets  wrote:
>
> On 6/12/24 20:12, Mike Pattrick wrote:
> > On Wed, Jun 12, 2024 at 9:50 AM Ales Musil  wrote:
> >
> >>
> >>
> >> On Wed, Jun 12, 2024 at 3:32 PM Mike Pattrick  wrote:
> >>
> >>> When compiling with '-fsanitize=address,undefined', the "ovs-ofctl
> >>> ct-flush" test will yield the following undefined behavior flagged
> >>> by UBSan. This patch uses memcpy to move the 128bit value into the
> >>> stack before reading it.
> >>>
> >>> lib/ofp-prop.c:277:14: runtime error: load of misaligned address
> >>> for type 'union ovs_be128', which requires 8 byte alignment
> >>>   ^
> >>> #0 0x7735d4 in ofpprop_parse_u128 lib/ofp-prop.c:277
> >>> #1 0x6c6c83 in ofp_ct_match_decode lib/ofp-ct.c:529
> >>> #2 0x76f3b5 in ofp_print_nxt_ct_flush lib/ofp-print.c:959
> >>> #3 0x76f3b5 in ofp_to_string__ lib/ofp-print.c:1206
> >>> #4 0x76f3b5 in ofp_to_string lib/ofp-print.c:1264
> >>> #5 0x770c0d in ofp_print lib/ofp-print.c:1308
> >>> #6 0x484a9d in ofctl_ofp_print utilities/ovs-ofctl.c:4899
> >>> #7 0x4ddb77 in ovs_cmdl_run_command__ lib/command-line.c:247
> >>> #8 0x47f6b3 in main utilities/ovs-ofctl.c:186
>
> Thanks for cleaning up the trace.  Please, also remove the '#' tags.
> GitHub trats them as references to issues/PRs and that is annoying.
>
> Also, while most of the addresses in the trace are not important and it's
> good to strip or shorten them, the actual address where the memory access
> happened is important here, so we can see what the actual alignment was.
> Was it part of the original error message?  Clang usually provides them,
> not sure about gcc.
>
> >>>
> >>> Signed-off-by: Mike Pattrick 
> >>> ---
> >>>
> >>
> >> Hi Mike,
> >>
> >> this is interesting, do you have an idea why it didn't fail in CI by now?
> >> Also AFAIR the ofprops is supposed to be aligned to 8 bytes so unless the
> >> buffer itself isn't allocated at an address that is not itself 8 bytes
> >> aligned it shouldn't happen. In that case we might actually have a problem
> >> with other sizes.
> >>
> >
> > Report is seen with gcc + ubsan, but not clang + ubsan. It is possible that
> > this is only seen due the test, this warning wasn't seen live.
>
> I agree with Ales on this one.  Properties supposed to be aligned.
> We need to find why they are not.  i.e. is it a property itself or
> something before it.

I took a closer look at this issue. I first ran into it with
"ovs-ofctl ofp-print", but the same error pops up in ovs-vswitchd with
a command like "ovs-ofctl ct-flush br0 labels=1".

The problem is that the put/parse_u128 functions don't add padding
between the 4 byte header and the data. This is something already
taken care of in the 64bit functions.

I'll resubmit with a more correct solution.


Thanks,
Mike

>
> We may need to take similar approach as in commit:
>   a5cc859a4228 ("ofp-actions: Use aligned structures when decoding ofp 
> actions.")
>
> >
> > Cheers,
> > M
> >
> >
> >>
> >>
> >>>  lib/ofp-prop.c | 4 +++-
> >>>  1 file changed, 3 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/lib/ofp-prop.c b/lib/ofp-prop.c
> >>> index 0a685750c..ed6365414 100644
> >>> --- a/lib/ofp-prop.c
> >>> +++ b/lib/ofp-prop.c
> >>> @@ -271,10 +271,12 @@ enum ofperr
> >>>  ofpprop_parse_u128(const struct ofpbuf *property, ovs_u128 *value)
> >>>  {
> >>>  ovs_be128 *p = property->msg;
> >>> +ovs_be128 aligned;
> >>>  if (ofpbuf_msgsize(property) != sizeof *p) {
> >>>  return OFPERR_OFPBPC_BAD_LEN;
> >>>  }
> >>> -*value = ntoh128(*p);
> >>> +memcpy(, p, sizeof aligned);
>
> FWIW, this doesn't actually fix the issue.  At least not in all the
> cases.  Compiler is free to make alignment assumptions based on the
> pointer type, so we can still have unaligned access inside the memcpy.
>
> Best regards, Ilya Maximets.
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] Add GSO UDP Offloading feature to OVS Internal Port

2024-06-25 Thread Mike Pattrick
On Tue, Jun 25, 2024 at 4:30 AM echken  wrote:
>
> The OVS internal port does not support UDP fragmentation offloading,
> resulting in large packets sent through the OVS internal port to OVS
> being prematurely fragmented. This increases the total number of packets
> processed in the path from the vport to the OVS bridge output port,
> affecting transmission efficiency.
>
> Signed-off-by: echken 
> ---
>  net/openvswitch/vport-internal_dev.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/net/openvswitch/vport-internal_dev.c 
> b/net/openvswitch/vport-internal_dev.c
> index 74c88a6baa43..c5a72c4dc6fd 100644
> --- a/net/openvswitch/vport-internal_dev.c
> +++ b/net/openvswitch/vport-internal_dev.c
> @@ -110,7 +110,8 @@ static void do_setup(struct net_device *netdev)
>
> netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
>NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
> -  NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
> +  NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL |
> +  NETIF_F_GSO_UDP | NETIF_F_GSO_UDP_L4;

I'll try testing this out, but preliminarily, NETIF_F_GSO_SOFTWARE
already contains NETIF_F_GSO_UDP_L4.


Thanks,
Mike

>
> netdev->vlan_features = netdev->features;
> netdev->hw_enc_features = netdev->features;
> --
> 2.34.1
>
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [v2] odp-execute: Check IPv4 checksum offload flag in AVX.

2024-06-20 Thread Mike Pattrick
On Mon, Jun 17, 2024 at 10:08 AM Emma Finn  wrote:
>
> The AVX implementation for IPv4 action did not check whether
> the IPv4 checksum offload flag has been set and was incorrectly
> calculating checksums in software. Adding a check to skip AVX
> checksum calculation when offload flags are set.
>
> Signed-off-by: Emma Finn 
> Reported-by: Eelco Chaudron 

This brings things inline with the scalar odp-execute.

Fixes: 5d11c47d3ebe ("userspace: Enable IP checksum offloading by default.")
Acked-by: Mike Pattrick 

Cheers,
M

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] ovs-lib: Recreate db when schema is corrupted.

2024-07-02 Thread Mike Pattrick
Previously ovs-lib would assume a database is valid if the file exists,
however, it is possible for the database file to exist but be unopenable
by ovsdb.

Now existence checks are augmented with schema checksum validation.
Databases with an invalid schema are removed.

Reported-at: https://issues.redhat.com/browse/FDP-689
Reported-by: Ihar Hrachyshka
Signed-off-by: Mike Pattrick 
---
 tests/ovsdb-server.at | 50 +++
 utilities/ovs-lib.in  | 23 
 2 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at
index ce6d32aee..54c1cda97 100644
--- a/tests/ovsdb-server.at
+++ b/tests/ovsdb-server.at
@@ -105,6 +105,56 @@ AT_CHECK([uuidfilt output], [0],
 ]], [])
 AT_CLEANUP
 
+AT_SETUP([truncated database recreated])
+AT_KEYWORDS([ovsdb unix])
+AT_SKIP_IF([test "$IS_WIN32" = "yes"])
+ordinal_schema > schema
+. ovs-lib
+
+dnl Check that DB is recreated when schema corrupted.
+echo 'x' > db
+AT_CHECK([upgrade_db db schema], [0], [stdout], [ignore])
+AT_CHECK([grep -c "db does not exist\|Creating empty database db" stdout], 
[0], [2
+])
+
+AT_DATA([txnfile], [[ovsdb-client transact unix:socket \
+'["ordinals",
+  {"op": "insert",
+   "table": "ordinals",
+   "row": {"number": 1, "name": "one"}}]'
+]])
+AT_CHECK([ovsdb-server --remote=punix:socket db --run="sh txnfile"], [0], 
[stdout], [stderr])
+
+dnl Check that DB is not recreated on corrupted log. This is similar to the 
previous test but
+dnl includes a mid-operation upgrade.
+echo 'xxx' >> db
+AT_CHECK([upgrade_db db schema], [0], [], [ignore])
+
+dnl Validate that the db can now be used.
+AT_DATA([txnfile], [[ovsdb-client transact unix:socket \
+'["ordinals",
+  {"op": "select",
+   "table": "ordinals",
+   "where": []}]'
+]])
+AT_CHECK([ovsdb-server --remote=punix:socket db --run="sh txnfile"], [0], 
[stdout], [stderr])
+AT_CHECK([grep 'syntax error: db: parse error.* in header line "xxx"' stderr], 
[0], [ignore])
+cat stdout >> output
+AT_CHECK([uuidfilt output], [0],
+  
[[[{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<1>"],"name":"one","number":1}]}]
+]], [])
+
+dnl Validate then create and join cluster.
+echo 'x' > db
+AT_CHECK([create_cluster db schema tcp:1.1.1.1: 1000], [0], [stdout], [])
+AT_CHECK([grep -c 'Creating cluster database db' stdout], [0], [1
+])
+echo 'x' > db
+AT_CHECK([join_cluster db schema tcp:1.1.1.1: tcp:2.2.2.2:], [0], 
[stdout], [])
+AT_CHECK([grep -c 'Joining db to cluster' stdout], [0], [1
+])
+AT_CLEANUP
+
 AT_SETUP([truncating database log with bad transaction])
 AT_KEYWORDS([ovsdb server positive unix])
 AT_SKIP_IF([test "$IS_WIN32" = "yes"])
diff --git a/utilities/ovs-lib.in b/utilities/ovs-lib.in
index 7812a94ee..7e580d730 100644
--- a/utilities/ovs-lib.in
+++ b/utilities/ovs-lib.in
@@ -447,12 +447,27 @@ backup_db () {
 action "Backing up database to $backup" cp "$DB_FILE" "$backup" || return 1
 }
 
+validate_db () {
+# Returns 0 if $DB_FILE is present and has a valid schema.
+# Returns 1 if $DB_FILE is not present.
+DB_FILE="$1"
+
+if test ! -e "$DB_FILE"; then
+return 1
+elif ! ovsdb_tool db-cksum "$DB_FILE" >/dev/null 2>&1; then
+rm -f "$DB_FILE"
+return 1
+fi
+
+return 0
+}
+
 upgrade_db () {
 DB_FILE="$1"
 DB_SCHEMA="$2"
 
 schemaver=`ovsdb_tool schema-version "$DB_SCHEMA"`
-if test ! -e "$DB_FILE"; then
+if ! validate_db "$DB_FILE"; then
 log_warning_msg "$DB_FILE does not exist"
 install_dir `dirname $DB_FILE`
 create_db "$DB_FILE" "$DB_SCHEMA"
@@ -513,7 +528,7 @@ create_cluster () {
   election_timer_arg="--election-timer=$ELECTION_TIMER_MS"
 fi
 
-if test ! -e "$DB_FILE"; then
+if ! validate_db "$DB_FILE"; then
 action "Creating cluster database $DB_FILE" ovsdb_tool 
$election_timer_arg create-cluster "$DB_FILE" "$DB_SCHEMA" "$LOCAL_ADDR"
 elif ovsdb_tool db-is-standalone "$DB_FILE"; then
 # Convert standalone database to clustered.
@@ -530,11 +545,11 @@ join_cluster() {
 LOCAL_ADDR="$3"
 REMOTE_ADDR="$4"
 
-if test -e "$DB_FILE" && ovsdb_tool db-is-standalone "$DB_FILE"; then
+if validate_db "$DB_FILE" && ovsdb_tool db-is-standalone "$DB_FILE"; then
 backup_db || return 1
 rm $DB_FILE
 fi
-if test ! -e "$DB_FILE"; then
+if ! validate_db "$DB_FILE"; then
 action "Joining $DB_FILE to cluster" \
ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR" 
"$REMOTE_ADDR"
 fi
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] Userspace: Software fallback for UDP encapsulated TCP segmentation.

2024-07-04 Thread Mike Pattrick
On Fri, Jun 21, 2024 at 11:08 AM David Marchand
 wrote:
>
> Hi Mike,
>
> On Thu, Jun 13, 2024 at 4:54 AM Mike Pattrick  wrote:
> >
> > When sending packets that are flagged as requiring segmentation to an
> > interface that doens't support this feature, send the packet to the TSO
> > software fallback instead of dropping it.
> >
> > Signed-off-by: Mike Pattrick 
>
> I started testing this patch (still not finished), I have some
> questions and comments.
>
> >
> > ---
> > v2:
> >  - Fixed udp tunnel length
> >  - Added test that UDP headers are correct
> >  - Split inner and outer ip_id into different counters
> >  - Set tunnel flags in reset_tcp_seg
> >
> > Signed-off-by: Mike Pattrick 
> > ---
> >  lib/dp-packet-gso.c | 89 +
> >  lib/dp-packet.h | 34 
> >  lib/netdev-native-tnl.c |  8 
> >  lib/netdev.c| 37 +++--
> >  tests/system-traffic.at | 87 
> >  5 files changed, 216 insertions(+), 39 deletions(-)
> >
> > diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
> > index 847685ad9..dc43ad662 100644
> > --- a/lib/dp-packet-gso.c
> > +++ b/lib/dp-packet-gso.c
> > @@ -47,6 +47,8 @@ dp_packet_gso_seg_new(const struct dp_packet *p, size_t 
> > hdr_len,
> >  seg->l2_5_ofs = p->l2_5_ofs;
> >  seg->l3_ofs = p->l3_ofs;
> >  seg->l4_ofs = p->l4_ofs;
> > +seg->inner_l3_ofs = p->inner_l3_ofs;
> > +seg->inner_l4_ofs = p->inner_l4_ofs;
> >
> >  /* The protocol headers remain the same, so preserve hash and mark. */
> >  *dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p);
> > @@ -71,7 +73,12 @@ dp_packet_gso_nr_segs(struct dp_packet *p)
> >  const char *data_tail;
> >  const char *data_pos;
> >
> > -data_pos = dp_packet_get_tcp_payload(p);
> > +if (dp_packet_hwol_is_tunnel_vxlan(p) ||
> > +dp_packet_hwol_is_tunnel_geneve(p)) {
> > +data_pos = dp_packet_get_inner_tcp_payload(p);
> > +} else {
> > +data_pos = dp_packet_get_tcp_payload(p);
> > +}
> >  data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p);
> >
> >  return DIV_ROUND_UP(data_tail - data_pos, segsz);
> > @@ -89,14 +96,19 @@ dp_packet_gso(struct dp_packet *p, struct 
> > dp_packet_batch **batches)
> >  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> >  struct dp_packet_batch *curr_batch = *batches;
> >  struct tcp_header *tcp_hdr;
> > +struct udp_header *tnl_hdr;
> >  struct ip_header *ip_hdr;
> > +uint16_t inner_ip_id = 0;
> > +uint16_t outer_ip_id = 0;
> >  struct dp_packet *seg;
> > +const char *data_pos;
> >  uint16_t tcp_offset;
> >  uint16_t tso_segsz;
> >  uint32_t tcp_seq;
> > -uint16_t ip_id;
> > +bool outer_ipv4;
> >  int hdr_len;
> >  int seg_len;
> > +bool tnl;
> >
> >  tso_segsz = dp_packet_get_tso_segsz(p);
> >  if (!tso_segsz) {
> > @@ -105,20 +117,38 @@ dp_packet_gso(struct dp_packet *p, struct 
> > dp_packet_batch **batches)
> >  return false;
> >  }
> >
> > -tcp_hdr = dp_packet_l4(p);
> > -tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
> > -tcp_seq = ntohl(get_16aligned_be32(_hdr->tcp_seq));
> > -hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p))
> > -  + tcp_offset * 4;
> > -ip_id = 0;
> > -if (dp_packet_hwol_is_ipv4(p)) {
> > +if (dp_packet_hwol_is_tunnel_vxlan(p) ||
> > +dp_packet_hwol_is_tunnel_geneve(p)) {
> > +data_pos =  dp_packet_get_inner_tcp_payload(p);
> > +outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
> > +tcp_hdr = dp_packet_inner_l4(p);
> > +ip_hdr = dp_packet_inner_l3(p);
> > +tnl = true;
> > +
> > +if (outer_ipv4) {
> > +outer_ip_id = ntohs(((struct ip_header *) 
> > dp_packet_l3(p))->ip_id);
> > +}
> > +if (dp_packet_hwol_is_ipv4(p)) {
> > +inner_ip_id = ntohs(ip_hdr->ip_id);
> > +}
> > +} else {
> > +data_pos = dp_packet_get_tcp_payload(p);
> > +outer_ipv4 = dp_packet_hwol_is_ipv4(p);
> > +tcp_hdr = dp_packet_l4(p);
> >  ip_hdr = dp_packet_l3(p);
> > -ip_id = ntohs(ip_hdr->ip_id);
> > +tnl =

[ovs-dev] [PATCH v6] tunnel: Allow UDP zero checksum with IPv6 tunnels.

2024-07-05 Thread Mike Pattrick
This patch adopts the proposed RFC 6935 by allowing null UDP checksums
even if the tunnel protocol is IPv6. This is already supported by Linux
through the udp6zerocsumtx tunnel option. It is disabled by default and
IPv6 tunnels are flagged as requiring a checksum, but this patch enables
the user to set csum=false on IPv6 tunnels.

Acked-by: Simon Horman 
Signed-off-by: Mike Pattrick 
---
 NEWS  | 11 +++
 lib/netdev-native-tnl.c   |  2 +-
 lib/netdev-vport.c| 17 +++--
 lib/netdev.h  | 18 +-
 ofproto/tunnel.c  | 10 --
 tests/tunnel-push-pop-ipv6.at |  9 +
 tests/tunnel-push-pop.at  |  7 +++
 tests/tunnel.at   |  2 +-
 vswitchd/vswitch.xml  | 12 +---
 9 files changed, 78 insertions(+), 10 deletions(-)

diff --git a/NEWS b/NEWS
index e0359b759..bbf489c3c 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,10 @@ Post-v3.3.0
  * Conntrack now supports 'random' flag for selecting ports in a range
while natting and 'persistent' flag for selection of the IP address
from a range.
+ * IPv6 UDP tunnel encapsulation including Geneve and VXLAN will now
+   honour the csum option.  Configuring the interface with
+   "options:csum=false" now has the same effect as the udp6zerocsumtx
+   option has with Linux kernel UDP tunnels.
- The primary development branch has been renamed from 'master' to 'main'.
  The OVS tree remains hosted on GitHub.
  https://github.com/openvswitch/ovs.git
@@ -16,6 +20,13 @@ Post-v3.3.0
per interface 'options:dpdk-lsc-interrupt' to 'false'.
- Python:
  * Added custom transaction support to the Idl via add_op().
+   - Kernel datapath:
+ * Previously the kernel datapath did not enable UDP checksums by default
+   in IPv6 tunnels.  This behaviour is non-standard, differs from the
+   Linux kernel, and as also different than the userspace datapath.  Now
+   these tunnels will calculate checksums by default and that behaviour can
+   be changed with "options:csum=false" just as with the userspace
+   datapath.
 
 
 v3.3.0 - 16 Feb 2024
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index 0f9f07f44..16c56608d 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -384,7 +384,7 @@ udp_build_header(const struct netdev_tunnel_config *tnl_cfg,
 udp = netdev_tnl_ip_build_header(data, params, IPPROTO_UDP, 0);
 udp->udp_dst = tnl_cfg->dst_port;
 
-if (params->is_ipv6 || params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
+if (params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
 /* Write a value in now to mark that we should compute the checksum
  * later. 0x is handy because it is transparent to the
  * calculation. */
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 60caa02fb..234a4ebe1 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -702,7 +702,9 @@ set_tunnel_config(struct netdev *dev_, const struct smap 
*args, char **errp)
 tnl_cfg.dst_port = htons(atoi(node->value));
 } else if (!strcmp(node->key, "csum") && has_csum) {
 if (!strcmp(node->value, "true")) {
-tnl_cfg.csum = true;
+tnl_cfg.csum = NETDEV_TNL_CSUM_ENABLED;
+} else if (!strcmp(node->value, "false")) {
+tnl_cfg.csum = NETDEV_TNL_CSUM_DISABLED;
 }
 } else if (!strcmp(node->key, "seq") && has_seq) {
 if (!strcmp(node->value, "true")) {
@@ -850,6 +852,15 @@ set_tunnel_config(struct netdev *dev_, const struct smap 
*args, char **errp)
 }
 }
 
+/* The default csum state for GRE is special as it does have an optional
+ * checksum but the default configuration isn't correlated with IP version
+ * like UDP tunnels are.  Likewise, tunnels with no checksum at all must be
+ * in this state. */
+if (tnl_cfg.csum == NETDEV_TNL_CSUM_DEFAULT &&
+(!has_csum || strstr(type, "gre"))) {
+tnl_cfg.csum = NETDEV_TNL_DEFAULT_NO_CSUM;
+}
+
 enum tunnel_layers layers = tunnel_supported_layers(type, _cfg);
 const char *full_type = (strcmp(type, "vxlan") ? type
  : (tnl_cfg.exts & (1 << OVS_VXLAN_EXT_GPE)
@@ -1026,8 +1037,10 @@ get_tunnel_config(const struct netdev *dev, struct smap 
*args)
 }
 }
 
-if (tnl_cfg->csum) {
+if (tnl_cfg->csum == NETDEV_TNL_CSUM_ENABLED) {
 smap_add(args, "csum", "true");
+} else if (tnl_cfg->csum == NETDEV_TNL_CSUM_DISABLED) {
+smap_add(args, "csum", "false");
 }
 
 if (tnl_cfg->set_seq) {
diff --git a/lib/netdev.h b/lib/netdev.h
index 67a8486bd.

[ovs-dev] [PATCH v4 1/3] userspace: Adjust segment size on encapsulation.

2024-07-05 Thread Mike Pattrick
When prepending a tunnel header to a packet marked for segmentation, we
need to adjust the segment size. Failure to do so can result in packets
that are larger than the intended MTU post segmentation.

Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
Signed-off-by: Mike Pattrick 
---
 lib/netdev-native-tnl.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index 0f9f07f44..da0bb6fda 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -161,6 +161,17 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, const 
void *header,
 *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);
 
 memcpy(eth, header, size);
+
+/* The prepended header may cause TSO marked packets to exceed the intended
+ * MTU on segmentation. */
+if (dp_packet_hwol_is_tso(packet)) {
+uint16_t tso_segsz = dp_packet_get_tso_segsz(packet);
+if (tso_segsz > size) {
+tso_segsz -= size;
+dp_packet_set_tso_segsz(packet, tso_segsz);
+}
+}
+
 /* The encapsulated packet has type Ethernet. Adjust dp_packet. */
 packet->packet_type = htonl(PT_ETH);
 dp_packet_reset_offsets(packet);
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 2/3] Userspace: Software fallback for UDP encapsulated TCP segmentation.

2024-07-05 Thread Mike Pattrick
When sending packets that are flagged as requiring segmentation to an
interface that does not support this feature, send the packet to the TSO
software fallback instead of dropping it.

Signed-off-by: Mike Pattrick 
---
v2:
 - Fixed udp tunnel length
 - Added test that UDP headers are correct
 - Split inner and outer ip_id into different counters
 - Set tunnel flags in reset_tcp_seg

v3:
 - Changed logic of netdev_send() to account for NICs that support
 tunnel offload but not checksum offload
 - Adjusted udp tunnel header length during software tso

v4:
 - Moved a bugfix into its own patch
 - Fixed an indentation issue
 - Changed the scope of ip_hdr
Signed-off-by: Mike Pattrick 
---
 lib/dp-packet-gso.c | 90 -
 lib/dp-packet.h | 34 
 lib/netdev.c| 44 ++--
 tests/system-traffic.at | 87 +++
 4 files changed, 214 insertions(+), 41 deletions(-)

diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
index 847685ad9..04ebb19da 100644
--- a/lib/dp-packet-gso.c
+++ b/lib/dp-packet-gso.c
@@ -47,6 +47,8 @@ dp_packet_gso_seg_new(const struct dp_packet *p, size_t 
hdr_len,
 seg->l2_5_ofs = p->l2_5_ofs;
 seg->l3_ofs = p->l3_ofs;
 seg->l4_ofs = p->l4_ofs;
+seg->inner_l3_ofs = p->inner_l3_ofs;
+seg->inner_l4_ofs = p->inner_l4_ofs;
 
 /* The protocol headers remain the same, so preserve hash and mark. */
 *dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p);
@@ -71,7 +73,12 @@ dp_packet_gso_nr_segs(struct dp_packet *p)
 const char *data_tail;
 const char *data_pos;
 
-data_pos = dp_packet_get_tcp_payload(p);
+if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+dp_packet_hwol_is_tunnel_geneve(p)) {
+data_pos = dp_packet_get_inner_tcp_payload(p);
+} else {
+data_pos = dp_packet_get_tcp_payload(p);
+}
 data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p);
 
 return DIV_ROUND_UP(data_tail - data_pos, segsz);
@@ -89,14 +96,16 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch 
**batches)
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 struct dp_packet_batch *curr_batch = *batches;
 struct tcp_header *tcp_hdr;
-struct ip_header *ip_hdr;
+uint16_t inner_ip_id = 0;
+uint16_t outer_ip_id = 0;
 struct dp_packet *seg;
 uint16_t tcp_offset;
 uint16_t tso_segsz;
 uint32_t tcp_seq;
-uint16_t ip_id;
+bool outer_ipv4;
 int hdr_len;
 int seg_len;
+bool tnl;
 
 tso_segsz = dp_packet_get_tso_segsz(p);
 if (!tso_segsz) {
@@ -105,20 +114,37 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch 
**batches)
 return false;
 }
 
-tcp_hdr = dp_packet_l4(p);
+if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+dp_packet_hwol_is_tunnel_geneve(p)) {
+outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
+tcp_hdr = dp_packet_inner_l4(p);
+tnl = true;
+
+if (outer_ipv4) {
+outer_ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id);
+}
+if (dp_packet_hwol_is_ipv4(p)) {
+struct ip_header *ip_hdr = dp_packet_inner_l3(p);
+inner_ip_id = ntohs(ip_hdr->ip_id);
+}
+} else {
+outer_ipv4 = dp_packet_hwol_is_ipv4(p);
+tcp_hdr = dp_packet_l4(p);
+tnl = false;
+
+if (outer_ipv4) {
+struct ip_header *ip_hdr = dp_packet_l3(p);
+outer_ip_id = ntohs(ip_hdr->ip_id);
+}
+}
+
 tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
 tcp_seq = ntohl(get_16aligned_be32(_hdr->tcp_seq));
-hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p))
+hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p))
   + tcp_offset * 4;
-ip_id = 0;
-if (dp_packet_hwol_is_ipv4(p)) {
-ip_hdr = dp_packet_l3(p);
-ip_id = ntohs(ip_hdr->ip_id);
-}
-
 const char *data_tail = (char *) dp_packet_tail(p)
 - dp_packet_l2_pad_size(p);
-const char *data_pos = dp_packet_get_tcp_payload(p);
+const char *data_pos = (char *) tcp_hdr + tcp_offset * 4;
 int n_segs = dp_packet_gso_nr_segs(p);
 
 for (int i = 0; i < n_segs; i++) {
@@ -130,14 +156,36 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch 
**batches)
 seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
 data_pos += seg_len;
 
+if (tnl) {
+/* Update tunnel UDP header length. */
+struct udp_header *tnl_hdr;
+
+tnl_hdr = dp_packet_l4(seg);
+tnl_hdr->udp_len = htons(dp_packet_l4_size(seg));
+
+/* Update tunnel inner L3 header. */
+if (dp_packet_hwol_is_ipv4(seg)) {
+struct ip_header *ip_hdr = dp_packet_inner_l3(seg);
+ip_hdr->ip_tot_len = htons(dp_packet_in

[ovs-dev] [PATCH v4 3/3] netdev-dpdk: Re-enable VXLAN/Geneve offload for Intel cards.

2024-07-05 Thread Mike Pattrick
Previously the OVS support for checksum/TSO offloading didn't work well
with some network cards that supported VXLAN/Geneve tunnel TSO but not
outer UDP checksums. Now support for this configuration is improved and
we no longer need to disable the VXLAN/Geneve TSO flags from intel
hardware support flags.

The modification to outer UDP offload is still required pending a future
DPDK release.

Suggested-by: David Marchand 
Signed-off-by: Mike Pattrick 
---
 lib/netdev-dpdk.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 02cef6e45..95a78241d 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1363,8 +1363,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
   "net/ice, net/i40e or net/iavf port.",
   netdev_get_name(>up));
 info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
-info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
-info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
 }
 
 if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 2/3] Userspace: Software fallback for UDP encapsulated TCP segmentation.

2024-07-09 Thread Mike Pattrick
On Tue, Jul 9, 2024 at 5:57 PM Ilya Maximets  wrote:
>
> On 7/5/24 22:44, Mike Pattrick wrote:
> > When sending packets that are flagged as requiring segmentation to an
> > interface that does not support this feature, send the packet to the TSO
> > software fallback instead of dropping it.
> >
> > Signed-off-by: Mike Pattrick 
> > ---
> > v2:
> >  - Fixed udp tunnel length
> >  - Added test that UDP headers are correct
> >  - Split inner and outer ip_id into different counters
> >  - Set tunnel flags in reset_tcp_seg
> >
> > v3:
> >  - Changed logic of netdev_send() to account for NICs that support
> >  tunnel offload but not checksum offload
> >  - Adjusted udp tunnel header length during software tso
> >
> > v4:
> >  - Moved a bugfix into its own patch
> >  - Fixed an indentation issue
> >  - Changed the scope of ip_hdr
> > Signed-off-by: Mike Pattrick 
> > ---
> >  lib/dp-packet-gso.c | 90 -
> >  lib/dp-packet.h | 34 
> >  lib/netdev.c| 44 ++--
> >  tests/system-traffic.at | 87 +++
> >  4 files changed, 214 insertions(+), 41 deletions(-)
>
> Hi, Mike.  Not a full review, but see a few comments below.
>
> > diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> > index 3f1a15445..dd30b6bf5 100644
> > --- a/tests/system-traffic.at
> > +++ b/tests/system-traffic.at
> > @@ -351,6 +351,93 @@ OVS_WAIT_UNTIL([diff -q payload.bin udp_data])
> >  OVS_TRAFFIC_VSWITCHD_STOP
> >  AT_CLEANUP
> >
> > +AT_SETUP([datapath - tcp over vxlan tunnel with software fallback])
> > +AT_SKIP_IF([test $HAVE_NC = no])
> > +AT_SKIP_IF([test $HAVE_TCPDUMP = no])
> > +OVS_CHECK_VXLAN()
> > +
> > +dnl This test is only valid with tso. If the kernel segments the packets, 
> > the
> > +dnl packet lengths in the final test will be different.
> > +m4_ifndef([CHECK_SYSTEM_TSO], [AT_SKIP_IF(:)])
> > +
> > +OVS_TRAFFIC_VSWITCHD_START()
> > +ADD_BR([br-underlay])
> > +
> > +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
> > +AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"])
> > +
> > +ADD_NAMESPACES(at_ns0)
> > +
> > +dnl Set up underlay link from host into the namespace using veth pair.
> > +ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24")
> > +AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"])
> > +AT_CHECK([ip link set dev br-underlay up])
> > +
> > +dnl Test the case where only one side has all checksum and tso offload 
> > disabled.
> > +AT_CHECK([ethtool -K ovs-p0 tso off], [0], [ignore], [ignore])
> > +AT_CHECK([ethtool -K ovs-p0 sg off], [0], [ignore], [ignore])
> > +
> > +dnl Reinitialize.
> > +AT_CHECK([ovs-vsctl del-port ovs-p0])
> > +AT_CHECK([ovs-vsctl add-port br-underlay ovs-p0])
> > +
> > +dnl Set up tunnel endpoints on OVS outside the namespace and with a native
> > +dnl linux device inside the namespace.
> > +ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24])
> > +ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], 
> > [10.1.1.1/24],
> > +  [id 0 dstport 4789])
> > +
> > +dnl First, check the underlay.
> > +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 172.31.1.100 | 
> > FORMAT_PING], [0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +dnl Check that the tunnel is up.
> > +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 10.1.1.100 | 
> > FORMAT_PING], [0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +dnl Start tcpdump to capture the encapsulated packets.
> > +OVS_DAEMONIZE([tcpdump -i ovs-p0 -w p0.pcap], [tcpdump.pid])
> > +sleep 1
>
> Don't sleep - wait.

The sleep here is to make sure that tcpdump has actually started
listening for packets. I found sometimes in github ci the pcap would
be missing the start of the transfer causing a failure. It's also used
in the conntrack tests.

> > +
> > +dnl Initialize the listener before it is needed.
> > +NETNS_DAEMONIZE([at_ns0], [nc -l 10.1.1.1 1234 > data2], [nc.pid])
> > +
> > +dnl Verify that ncat is ready.
> > +OVS_WAIT_UNTIL([NS_EXEC([at_ns0], [netstat -ln | grep :1234])])
> > +
> > +dnl Large TCP transfer aimed towards ovs-p0, which has TSO disabled.
> > +AT_CHECK([dd if=/dev/urandom of=payload.bin bs=6 count=1

Re: [ovs-dev] [ovs-dev, v8] [PATCH]ipf: Fix ovs ipf crash.

2024-07-09 Thread Mike Pattrick
On Tue, Jul 9, 2024 at 1:49 AM 赖香武 <15310488...@163.com> wrote:
>
> hi, llya and Michael
>
> Can you take the time to take a look at my patch? Thank you.

Hello,

I wasn't able to reproduce the crash using your method and
unfortunately the stack trace appears to be missing debug symbols.

Looking at the code, I can guess at some ways that would trigger the
crash. Instead of changing the current behaviour on expiration I think
it would be better to just check last_sent_idx before accessing
frag_list[].pkt.

If last_sent_idx != IPF_INVALID_IDX, then functions like
ipf_post_execute_reass_pkts(), ipf_process_frag(), etc can just return
immediately.


Thanks,
M


>
> At 2024-05-28 23:16:46, "Ilya Maximets"  wrote:
> >On 5/23/24 09:40, laixiangwu wrote:
> >> Description:
> >>
> >> when 1) The fragment timeout is between 15 seconds and 25 seconds; 2)
> >> The ipf_list currently has received more than 32 fragments, and there
> >> are other fragments of same big packet that have not been received.
> >>
> >> When the above two scenario conditions are met, due to exceeding the
> >> capacity of the packet batch(here is 32), ipf_dp_packet_batch_add
> >> returns false, and ipf_list will not be cleared. However, the 32
> >> fragments packets added to the packet batch will be processed normally.
> >> When receiving the subsequent fragments of the ipf_list, because the
> >> first 32 fragments have been processed, when processing subsequent
> >> fragment packets, relevant information about the processed fragment
> >> packets will be read,therefore will occur carsh.
> >> One solution is do not forward timeout fragment packets from the above
> >> scenarios, that is, do not add them to the packet batch, and handle
> >> other scenarios according to the original logic.
> >> Signed-off-by: laixiangwu <15310488...@163.com>
> >> ---
> >>  lib/ipf.c | 10 --
> >>  1 file changed, 4 insertions(+), 6 deletions(-)
> >
> >Hi, laixiangwu.  This version of the patch looks the same as the
> >previous one here:
> >  
> > https://patchwork.ozlabs.org/project/openvswitch/patch/20240522021957.2292-1-15310488...@163.com/
> >
> >And I see Mike asked a few questions for the approach there.
> >Could you, please, answer those?
> >
> >For now, I'll mark this patch with 'Changes Requested'.
> >
> >If you plan to send a new version based on Mike's comments, please, add
> >'v6' to the subject prefix, i.e. [PATCH v6], since it's technically a
> >6th version of it.
> >
> >Best regards, Ilya Maximets.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 2/3] Userspace: Software fallback for UDP encapsulated TCP segmentation.

2024-07-12 Thread Mike Pattrick
When sending packets that are flagged as requiring segmentation to an
interface that does not support this feature, send the packet to the TSO
software fallback instead of dropping it.

Signed-off-by: Mike Pattrick 
---
v2:
 - Fixed udp tunnel length
 - Added test that UDP headers are correct
 - Split inner and outer ip_id into different counters
 - Set tunnel flags in reset_tcp_seg

v3:
 - Changed logic of netdev_send() to account for NICs that support
 tunnel offload but not checksum offload
 - Adjusted udp tunnel header length during software tso

v4:
 - Moved a bugfix into its own patch
 - Fixed an indentation issue
 - Changed the scope of ip_hdr

v5:
 - Change order of tso fallback check
 - Added a unit test
Signed-off-by: Mike Pattrick 
---
 lib/dp-packet-gso.c |  90 ++
 lib/dp-packet.h |  34 +++
 lib/netdev.c|  44 -
 tests/dpif-netdev.at| 202 
 tests/system-traffic.at |  90 ++
 5 files changed, 419 insertions(+), 41 deletions(-)

diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
index 847685ad9..04ebb19da 100644
--- a/lib/dp-packet-gso.c
+++ b/lib/dp-packet-gso.c
@@ -47,6 +47,8 @@ dp_packet_gso_seg_new(const struct dp_packet *p, size_t 
hdr_len,
 seg->l2_5_ofs = p->l2_5_ofs;
 seg->l3_ofs = p->l3_ofs;
 seg->l4_ofs = p->l4_ofs;
+seg->inner_l3_ofs = p->inner_l3_ofs;
+seg->inner_l4_ofs = p->inner_l4_ofs;
 
 /* The protocol headers remain the same, so preserve hash and mark. */
 *dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p);
@@ -71,7 +73,12 @@ dp_packet_gso_nr_segs(struct dp_packet *p)
 const char *data_tail;
 const char *data_pos;
 
-data_pos = dp_packet_get_tcp_payload(p);
+if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+dp_packet_hwol_is_tunnel_geneve(p)) {
+data_pos = dp_packet_get_inner_tcp_payload(p);
+} else {
+data_pos = dp_packet_get_tcp_payload(p);
+}
 data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p);
 
 return DIV_ROUND_UP(data_tail - data_pos, segsz);
@@ -89,14 +96,16 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch 
**batches)
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 struct dp_packet_batch *curr_batch = *batches;
 struct tcp_header *tcp_hdr;
-struct ip_header *ip_hdr;
+uint16_t inner_ip_id = 0;
+uint16_t outer_ip_id = 0;
 struct dp_packet *seg;
 uint16_t tcp_offset;
 uint16_t tso_segsz;
 uint32_t tcp_seq;
-uint16_t ip_id;
+bool outer_ipv4;
 int hdr_len;
 int seg_len;
+bool tnl;
 
 tso_segsz = dp_packet_get_tso_segsz(p);
 if (!tso_segsz) {
@@ -105,20 +114,37 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch 
**batches)
 return false;
 }
 
-tcp_hdr = dp_packet_l4(p);
+if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+dp_packet_hwol_is_tunnel_geneve(p)) {
+outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
+tcp_hdr = dp_packet_inner_l4(p);
+tnl = true;
+
+if (outer_ipv4) {
+outer_ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id);
+}
+if (dp_packet_hwol_is_ipv4(p)) {
+struct ip_header *ip_hdr = dp_packet_inner_l3(p);
+inner_ip_id = ntohs(ip_hdr->ip_id);
+}
+} else {
+outer_ipv4 = dp_packet_hwol_is_ipv4(p);
+tcp_hdr = dp_packet_l4(p);
+tnl = false;
+
+if (outer_ipv4) {
+struct ip_header *ip_hdr = dp_packet_l3(p);
+outer_ip_id = ntohs(ip_hdr->ip_id);
+}
+}
+
 tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
 tcp_seq = ntohl(get_16aligned_be32(_hdr->tcp_seq));
-hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p))
+hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p))
   + tcp_offset * 4;
-ip_id = 0;
-if (dp_packet_hwol_is_ipv4(p)) {
-ip_hdr = dp_packet_l3(p);
-ip_id = ntohs(ip_hdr->ip_id);
-}
-
 const char *data_tail = (char *) dp_packet_tail(p)
 - dp_packet_l2_pad_size(p);
-const char *data_pos = dp_packet_get_tcp_payload(p);
+const char *data_pos = (char *) tcp_hdr + tcp_offset * 4;
 int n_segs = dp_packet_gso_nr_segs(p);
 
 for (int i = 0; i < n_segs; i++) {
@@ -130,14 +156,36 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch 
**batches)
 seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
 data_pos += seg_len;
 
+if (tnl) {
+/* Update tunnel UDP header length. */
+struct udp_header *tnl_hdr;
+
+tnl_hdr = dp_packet_l4(seg);
+tnl_hdr->udp_len = htons(dp_packet_l4_size(seg));
+
+/* Update tunnel inner L3 header. */
+if (dp_packet_hwol_is_ipv4(seg)) {
+struct ip_header *ip_hdr = dp_packet_inne

[ovs-dev] [PATCH v5 3/3] netdev-dpdk: Re-enable VXLAN/Geneve offload for Intel cards.

2024-07-12 Thread Mike Pattrick
Previously the OVS support for checksum/TSO offloading didn't work well
with some network cards that supported VXLAN/Geneve tunnel TSO but not
outer UDP checksums. Now support for this configuration is improved and
we no longer need to disable the VXLAN/Geneve TSO flags from intel
hardware support flags.

The modification to outer UDP offload is still required pending a future
DPDK release.

Suggested-by: David Marchand 
Signed-off-by: Mike Pattrick 
---
 lib/netdev-dpdk.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 02cef6e45..95a78241d 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1363,8 +1363,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
   "net/ice, net/i40e or net/iavf port.",
   netdev_get_name(>up));
 info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
-info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
-info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
 }
 
 if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 1/3] userspace: Adjust segment size on encapsulation.

2024-07-12 Thread Mike Pattrick
When prepending a tunnel header to a packet marked for segmentation, we
need to adjust the segment size. Failure to do so can result in packets
that are larger than the intended MTU post segmentation.

Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
Signed-off-by: Mike Pattrick 
---
 lib/netdev-native-tnl.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index 16c56608d..78c91b5fa 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -161,6 +161,17 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, const 
void *header,
 *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);
 
 memcpy(eth, header, size);
+
+/* The prepended header may cause TSO marked packets to exceed the intended
+ * MTU on segmentation. */
+if (dp_packet_hwol_is_tso(packet)) {
+uint16_t tso_segsz = dp_packet_get_tso_segsz(packet);
+if (tso_segsz > size) {
+tso_segsz -= size;
+dp_packet_set_tso_segsz(packet, tso_segsz);
+}
+}
+
 /* The encapsulated packet has type Ethernet. Adjust dp_packet. */
 packet->packet_type = htonl(PT_ETH);
 dp_packet_reset_offsets(packet);
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v1 1/2] ovs-monitor-ipsec: LibreSwan autodetect verion.

2024-06-27 Thread Mike Pattrick
Previously a change was made to LibreSwan necessitating the detection
of version numbers. However, this change didn't properly account for all
possible output from "ipsec version". When installed from the git
repository, LibreSwan will report versions differently then when
installed from a package.

Fixes: 3ddb31f60487 ("ovs-monitor-ipsec: LibreSwan autodetect paths.")
Signed-off-by: Mike Pattrick 
---
 ipsec/ovs-monitor-ipsec.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in
index bc7ac5523..2b602c75f 100755
--- a/ipsec/ovs-monitor-ipsec.in
+++ b/ipsec/ovs-monitor-ipsec.in
@@ -464,7 +464,7 @@ conn prevent_unencrypted_vxlan
 encoding="latin1")
 pout, perr = proc.communicate()
 
-v = re.match("^Libreswan (.*)$", pout)
+v = re.match("^Libreswan v?(.*)$", pout)
 try:
 version = int(v.group(1).split(".")[0])
 except:
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v1 2/2] ovs-monitor-ipsec: LibreSwan v5 support.

2024-06-27 Thread Mike Pattrick
In version 5, LibreSwan made significant command line interface changes.
This includes changing the order or command line parameters and removing
the "ipsec auto" command.

To maintain compatibility with previous versions, the ipsec.d version
check is repurposed for this. Checking the version proved simpler than
removing use of auto.

There was also a change to ipsec status command that effected the tests.
However, this change was backwards compatible.

Reported-at: https://issues.redhat.com/browse/FDP-645
Reported-by: Ilya Maximets 
Signed-off-by: Mike Pattrick 
---
 ipsec/ovs-monitor-ipsec.in | 46 +-
 tests/system-ipsec.at  |  8 +++
 2 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in
index 2b602c75f..37c509ac6 100755
--- a/ipsec/ovs-monitor-ipsec.in
+++ b/ipsec/ovs-monitor-ipsec.in
@@ -459,6 +459,7 @@ conn prevent_unencrypted_vxlan
 def __init__(self, libreswan_root_prefix, args):
 # Collect version infromation
 self.IPSEC = libreswan_root_prefix + "/usr/sbin/ipsec"
+self.IPSEC_AUTO = [self.IPSEC]
 proc = subprocess.Popen([self.IPSEC, "--version"],
 stdout=subprocess.PIPE,
 encoding="latin1")
@@ -470,6 +471,11 @@ conn prevent_unencrypted_vxlan
 except:
 version = 0
 
+if version < 5:
+# With v5, LibreSWAN removed the auto command, however, it is
+# still required for older versions
+self.IPSEC_AUTO.append("auto")
+
 if version >= 4:
 ipsec_d = args.ipsec_d if args.ipsec_d else "/var/lib/ipsec/nss"
 else:
@@ -593,7 +599,7 @@ conn prevent_unencrypted_vxlan
 
 def refresh(self, monitor):
 vlog.info("Refreshing LibreSwan configuration")
-subprocess.call([self.IPSEC, "auto", "--ctlsocket", self.IPSEC_CTL,
+subprocess.call(self.IPSEC_AUTO + ["--ctlsocket", self.IPSEC_CTL,
 "--config", self.IPSEC_CONF, "--rereadsecrets"])
 tunnels = set(monitor.tunnels.keys())
 
@@ -621,7 +627,7 @@ conn prevent_unencrypted_vxlan
 
 if not tunnel or tunnel.version != ver:
 vlog.info("%s is outdated %u" % (conn, ver))
-subprocess.call([self.IPSEC, "auto", "--ctlsocket",
+subprocess.call(self.IPSEC_AUTO + ["--ctlsocket",
 self.IPSEC_CTL, "--config",
 self.IPSEC_CONF, "--delete", conn])
 elif ifname in tunnels:
@@ -643,44 +649,44 @@ conn prevent_unencrypted_vxlan
 # Update shunt policy if changed
 if monitor.conf_in_use["skb_mark"] != monitor.conf["skb_mark"]:
 if monitor.conf["skb_mark"]:
-subprocess.call([self.IPSEC, "auto",
-"--config", self.IPSEC_CONF,
+subprocess.call(self.IPSEC_AUTO +
+["--config", self.IPSEC_CONF,
 "--ctlsocket", self.IPSEC_CTL,
 "--add",
 "--asynchronous", "prevent_unencrypted_gre"])
-subprocess.call([self.IPSEC, "auto",
-"--config", self.IPSEC_CONF,
+subprocess.call(self.IPSEC_AUTO +
+["--config", self.IPSEC_CONF,
 "--ctlsocket", self.IPSEC_CTL,
 "--add",
 "--asynchronous", "prevent_unencrypted_geneve"])
-subprocess.call([self.IPSEC, "auto",
-"--config", self.IPSEC_CONF,
+subprocess.call(self.IPSEC_AUTO +
+["--config", self.IPSEC_CONF,
 "--ctlsocket", self.IPSEC_CTL,
 "--add",
 "--asynchronous", "prevent_unencrypted_stt"])
-subprocess.call([self.IPSEC, "auto",
-"--config", self.IPSEC_CONF,
+subprocess.call(self.IPSEC_AUTO +
+["--config", self.IPSEC_CONF,
 "--ctlsocket", self.IPSEC_CTL,
 "--add",
 "--asynchronous", "prevent_unencrypted_vxlan"])
 else:
-   

Re: [ovs-dev] [PATCH v4] ofproto: Add upcall/dump-ufid-rules command to map UFIDs to OpenFlow.

2024-06-27 Thread Mike Pattrick
On Thu, Jun 27, 2024 at 4:13 PM Ilya Maximets  wrote:
>
> On 6/18/24 21:41, Dumitru Ceara wrote:
> > It improves the debugging experience if we can easily get a list of
> > OpenFlow rules and groups that contribute to the creation of a datapath
> > flow.
> >
> > The suggested workflow is:
> > a. dump datapath flows (along with UUIDs), this also prints the core IDs
> > (PMD IDs) when applicable.
> >
> >   $ ovs-appctl dpctl/dump-flows -m
> >   flow-dump from pmd on cpu core: 7
> >   ufid:7460db8f..., recirc_id(0), 
> >
> > b. dump related OpenFlow rules and groups:
> >   $ ovs-appctl upcall/dump-ufid-rules ufid:7460db8f... pmd=7
> >   cookie=0x12345678, table=0 
> > priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
> >   cookie=0x0, table=1 priority=200,actions=group:1
> >   group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
> >   cookie=0x0, table=2 actions=output:1
>
> Hi, Dumitru.  Thanks for the patch!
>
> This is definitely an interesting debug interface.  See some comments inline.
>
>
> As an idea, should we call it ofproto/detrace ?
>
> >
> > The new command only shows rules and groups attached to ukeys that are
> > in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
> > other ukeys should not be relevant for the use case presented above.
> >
> > For ukeys that don't have an xcache populated yet, the command goes
> > ahead and populates one.  In theory this is creates a slight overhead as
> > those ukeys might not need an xcache until they see traffic (and get
> > revalidated) but in practice the overhead should be minimal.
>
> The flow dumped for the first time should always be revalidated,
> even if it didn't see any more traffic.  See the should_revalidate()
> function.  This means that there is only very narrow window between
> the flow installation and the first revalidation, so the cache should
> be availabel in all practical cases.
>
> Kepeing that in mind, I don't think that populating xcache from the
> debug appctl is a good idea.  We're holding the mutex for a long time
> while effectively revalidating the ukey from the appctl and more
> importantly we're revalidating it with side effects allowed.  This
> means we can learn some rules or update some other caches or even
> emit some packets.  We should not do that from the debug appctl.
>
> I suggest to avoid this and just return early if the cache is not
> yet available.  It should be available in most practical situations.
>
> >
> > This commit tries to mimic the output format of the ovs-ofctl
> > dump-flows/dump-groups commands.  For groups it actually uses
> > ofputil_group/_bucket functions for formatting.  For rules it uses
> > flow_stats_ds() (the original function was exported and renamed to
> > ofproto_rule_stats_ds()).
> >
> > Signed-off-by: Dumitru Ceara 
> > ---
> > V4:
> > - Addressed Mike's comment:
> >   - use predictable port IDs.
> > V3:
> > - Addressed Mike's comments:
> >   - use ds_put_char()
> >   - make the test less flaky
> > V2:
> > - Addressed Adrian's comments:
> >   - check return value of populate_xcache()
> >   - use flow_stats_ds() (renamed to ofproto_rule_stats_ds()) instead of
> > custom printing
> >   - move ukey->state check to caller
> >   - handle case when group bucket is not available
> >   - update test case to cover the point above
> > ---
> >  NEWS|   3 +
> >  include/openvswitch/ofp-group.h |   7 ++
> >  lib/ofp-group.c | 110 +++-
> >  ofproto/ofproto-dpif-upcall.c   |  85 
> >  ofproto/ofproto-dpif.c  |  24 +++
> >  ofproto/ofproto-dpif.h  |   2 +
> >  ofproto/ofproto-provider.h  |   1 +
> >  ofproto/ofproto.c   |  85 
> >  tests/ofproto-dpif.at   |  47 ++
> >  tests/ofproto-macros.at |   9 ++-
> >  10 files changed, 285 insertions(+), 88 deletions(-)
> >
> > diff --git a/NEWS b/NEWS
> > index 5ae0108d552b..1bc085f97045 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -9,6 +9,9 @@ Post-v3.3.0
> >   https://github.com/openvswitch/ovs.git
> > - DPDK:
> >   * OVS validated with DPDK 23.11.1.
> > +   - ovs-appctl:
> > + * Added 'upcall/dump-ufid-rules' to output the set of OpenFlow rules 
> > and
> > +   groups that contributed to the creation of a specific datapath flow.
>
> I'd move this to the top, alpabetically.  I know that some entries
> here are not in order, but it's better to keep them at least in
> approximate order.
>
> >
> >
> >  v3.3.0 - 16 Feb 2024
> > diff --git a/include/openvswitch/ofp-group.h 
> > b/include/openvswitch/ofp-group.h
> > index cd7af0ebff9c..79fcb3a4c0d1 100644
> > --- a/include/openvswitch/ofp-group.h
> > +++ b/include/openvswitch/ofp-group.h
> > @@ -70,6 +70,11 @@ struct ofputil_bucket *ofputil_bucket_find(const struct 
> > ovs_list *,
> >  bool ofputil_bucket_check_duplicate_id(const struct ovs_list *);
> >  

Re: [ovs-dev] [PATCH v2] Userspace: Software fallback for UDP encapsulated TCP segmentation.

2024-06-28 Thread Mike Pattrick
On Fri, Jun 28, 2024 at 4:50 AM David Marchand
 wrote:
>
> On Thu, Jun 27, 2024 at 4:04 PM David Marchand
>  wrote:
> > @@ -926,6 +926,17 @@ netdev_send(struct netdev *netdev, int qid,
> > struct dp_packet_batch *batch,
> >  return netdev_send_tso(netdev, qid, batch, 
> > concurrent_txq);
> >  }
> >  }
> > +} else if (!(netdev_flags & NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM)) {
> > +DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> > +if (!dp_packet_hwol_is_tso(packet) &&
> > +!dp_packet_hwol_is_tunnel_vxlan(packet) &&
> > +!dp_packet_hwol_is_tunnel_geneve(packet)) {
> > +continue;
>
> Erm, less buggy:

Thanks for the review David! These are some good ideas. I'll send in a
v3 with them included.


Cheers,
M

>
> +if (!dp_packet_hwol_is_tso(packet) ||
> +(!dp_packet_hwol_is_tunnel_vxlan(packet) &&
> + !dp_packet_hwol_is_tunnel_geneve(packet))) {
> +continue;
> +}
>
> > +}
> > +if (dp_packet_hwol_is_outer_udp_cksum(packet)) {
> > +return netdev_send_tso(netdev, qid, batch, 
> > concurrent_txq);
> > +}
> > +}
> >  }
> >  }
>
>
> --
> David Marchand
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v10 2/2] ofproto-dpif-mirror: Add support for pre-selection filter.

2024-07-08 Thread Mike Pattrick
Currently a bridge mirror will collect all packets and tools like
ovs-tcpdump can apply additional filters after they have already been
duplicated by vswitchd. This can result in inefficient collection.

This patch adds support to apply pre-selection to bridge mirrors, which
can limit which packets are mirrored based on flow metadata. This
significantly improves overall vswitchd performance during mirroring if
only a subset of traffic is required.

Signed-off-by: Mike Pattrick 
---
v8:
 - Corrected code from v7 related to sequence and in_port. Mirrors
   reject filters with an in_port set as this could cause confusion.
 - Combined ovsrcu pointers into a new struct, minimatch wasn't used
   because the minimatch_* functions didn't fit the usage here.
 - Added a test to check for modifying filters when partially
   overlapping flows already exist.
 - Corrected documentation.
v9:
 - Explicitly cleared mirror_config.filter* when not set
v10:
 - Changed rcu memory order
 - Updated documentation to refer to packets instead of flows
 - Updated comments
---
 Documentation/ref/ovs-tcpdump.8.rst |   8 +-
 NEWS|   6 +
 lib/flow.h  |   9 ++
 ofproto/ofproto-dpif-mirror.c   | 106 +-
 ofproto/ofproto-dpif-mirror.h   |   8 +-
 ofproto/ofproto-dpif-xlate.c|  15 ++-
 ofproto/ofproto-dpif.c  |  12 +-
 ofproto/ofproto.h   |   3 +
 tests/ofproto-dpif.at   | 167 
 utilities/ovs-tcpdump.in|  13 ++-
 vswitchd/bridge.c   |  13 ++-
 vswitchd/vswitch.ovsschema  |   7 +-
 vswitchd/vswitch.xml|  16 +++
 13 files changed, 368 insertions(+), 15 deletions(-)

diff --git a/Documentation/ref/ovs-tcpdump.8.rst 
b/Documentation/ref/ovs-tcpdump.8.rst
index b9f8cdf6f..e7bd5e9e4 100644
--- a/Documentation/ref/ovs-tcpdump.8.rst
+++ b/Documentation/ref/ovs-tcpdump.8.rst
@@ -61,8 +61,14 @@ Options
 
   If specified, mirror all ports (optional).
 
+* ``--filter ``
+
+  If specified, only mirror packets that match the provided OpenFlow filter.
+  The available fields are documented in ``ovs-fields(7)``.
+
 See Also
 
 
 ``ovs-appctl(8)``, ``ovs-vswitchd(8)``, ``ovs-pcap(1)``,
-``ovs-tcpundump(1)``, ``tcpdump(8)``, ``wireshark(8)``.
+``ovs-fields(7)``, ``ovs-tcpundump(1)``, ``tcpdump(8)``,
+``wireshark(8)``.
diff --git a/NEWS b/NEWS
index e0359b759..cbe777a71 100644
--- a/NEWS
+++ b/NEWS
@@ -16,6 +16,12 @@ Post-v3.3.0
per interface 'options:dpdk-lsc-interrupt' to 'false'.
- Python:
  * Added custom transaction support to the Idl via add_op().
+   - ovs-vsctl:
+ * Added a new filter column in the Mirror table which can be used to
+   apply filters to mirror ports.
+   - ovs-tcpdump:
+ * Added command line parameter --filter to enable filtering the packets
+   that are captured by tcpdump.
 
 
 v3.3.0 - 16 Feb 2024
diff --git a/lib/flow.h b/lib/flow.h
index 75a9be3c1..60ec4b0d7 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -939,6 +939,15 @@ flow_union_with_miniflow(struct flow *dst, const struct 
miniflow *src)
 flow_union_with_miniflow_subset(dst, src, src->map);
 }
 
+/* Perform a bitwise OR of minimask 'src' mask data with the equivalent
+ * fields in 'dst', storing the result in 'dst'. */
+static inline void
+flow_wildcards_union_with_minimask(struct flow_wildcards *dst,
+   const struct minimask *src)
+{
+flow_union_with_miniflow_subset(>masks, >masks, src->masks.map);
+}
+
 static inline bool is_ct_valid(const struct flow *flow,
const struct flow_wildcards *mask,
struct flow_wildcards *wc)
diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c
index 4967ecc9a..a14f85843 100644
--- a/ofproto/ofproto-dpif-mirror.c
+++ b/ofproto/ofproto-dpif-mirror.c
@@ -21,6 +21,7 @@
 #include "cmap.h"
 #include "hmapx.h"
 #include "ofproto.h"
+#include "ofproto-dpif-trace.h"
 #include "vlan-bitmap.h"
 #include "openvswitch/vlog.h"
 
@@ -48,6 +49,11 @@ struct mbundle {
 mirror_mask_t mirror_out;   /* Mirrors that output to this mbundle. */
 };
 
+struct filtermask {
+struct miniflow *flow;
+struct minimask *mask;
+};
+
 struct mirror {
 struct mbridge *mbridge;/* Owning ofproto. */
 size_t idx; /* In ofproto's "mirrors" array. */
@@ -57,6 +63,10 @@ struct mirror {
 struct hmapx srcs;  /* Contains "struct mbundle*"s. */
 struct hmapx dsts;  /* Contains "struct mbundle*"s. */
 
+/* Filter criteria. */
+OVSRCU_TYPE(struct filtermask *) filter_mask;
+char *filter_str;
+
 /* This is accessed by handler threads assuming RCU protection (see
  * mirror_get()), but can be manipulated by mirror_set() without any
  * ex

[ovs-dev] [PATCH v10 1/2] ofproto-dpif-mirror: Reduce number of function parameters.

2024-07-08 Thread Mike Pattrick
Previously the mirror_set() and mirror_get() functions took a large
number of parameters, which was inefficient and difficult to read and
extend. This patch moves most of the parameters into a struct.

Acked-by: Simon Horman 
Acked-by: Eelco Chaudron 
Signed-off-by: Mike Pattrick 
---
 ofproto/ofproto-dpif-mirror.c | 60 ++-
 ofproto/ofproto-dpif-mirror.h | 40 ++-
 ofproto/ofproto-dpif-xlate.c  | 29 -
 ofproto/ofproto-dpif.c| 23 +++---
 4 files changed, 88 insertions(+), 64 deletions(-)

diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c
index 343b75f0e..4967ecc9a 100644
--- a/ofproto/ofproto-dpif-mirror.c
+++ b/ofproto/ofproto-dpif-mirror.c
@@ -207,19 +207,22 @@ mirror_bundle_dst(struct mbridge *mbridge, struct 
ofbundle *ofbundle)
 }
 
 int
-mirror_set(struct mbridge *mbridge, void *aux, const char *name,
-   struct ofbundle **srcs, size_t n_srcs,
-   struct ofbundle **dsts, size_t n_dsts,
-   unsigned long *src_vlans, struct ofbundle *out_bundle,
-   uint16_t snaplen,
-   uint16_t out_vlan)
+mirror_set(struct mbridge *mbridge, void *aux,
+   const struct ofproto_mirror_settings *ms,
+   const struct mirror_bundles *mb)
 {
 struct mbundle *mbundle, *out;
 mirror_mask_t mirror_bit;
 struct mirror *mirror;
 struct hmapx srcs_map;  /* Contains "struct ofbundle *"s. */
 struct hmapx dsts_map;  /* Contains "struct ofbundle *"s. */
+uint16_t out_vlan;
 
+if (!ms || !mbridge) {
+return EINVAL;
+}
+
+out_vlan = ms->out_vlan;
 mirror = mirror_lookup(mbridge, aux);
 if (!mirror) {
 int idx;
@@ -227,7 +230,7 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 idx = mirror_scan(mbridge);
 if (idx < 0) {
 VLOG_WARN("maximum of %d port mirrors reached, cannot create %s",
-  MAX_MIRRORS, name);
+  MAX_MIRRORS, ms->name);
 return EFBIG;
 }
 
@@ -242,8 +245,8 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 unsigned long *vlans = ovsrcu_get(unsigned long *, >vlans);
 
 /* Get the new configuration. */
-if (out_bundle) {
-out = mbundle_lookup(mbridge, out_bundle);
+if (mb->out_bundle) {
+out = mbundle_lookup(mbridge, mb->out_bundle);
 if (!out) {
 mirror_destroy(mbridge, mirror->aux);
 return EINVAL;
@@ -252,16 +255,16 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 } else {
 out = NULL;
 }
-mbundle_lookup_multiple(mbridge, srcs, n_srcs, _map);
-mbundle_lookup_multiple(mbridge, dsts, n_dsts, _map);
+mbundle_lookup_multiple(mbridge, mb->srcs, mb->n_srcs, _map);
+mbundle_lookup_multiple(mbridge, mb->dsts, mb->n_dsts, _map);
 
 /* If the configuration has not changed, do nothing. */
 if (hmapx_equals(_map, >srcs)
 && hmapx_equals(_map, >dsts)
-&& vlan_bitmap_equal(vlans, src_vlans)
+&& vlan_bitmap_equal(vlans, ms->src_vlans)
 && mirror->out == out
 && mirror->out_vlan == out_vlan
-&& mirror->snaplen == snaplen)
+&& mirror->snaplen == ms->snaplen)
 {
 hmapx_destroy(_map);
 hmapx_destroy(_map);
@@ -275,15 +278,15 @@ mirror_set(struct mbridge *mbridge, void *aux, const char 
*name,
 hmapx_swap(_map, >dsts);
 hmapx_destroy(_map);
 
-if (vlans || src_vlans) {
+if (vlans || ms->src_vlans) {
 ovsrcu_postpone(free, vlans);
-vlans = vlan_bitmap_clone(src_vlans);
+vlans = vlan_bitmap_clone(ms->src_vlans);
 ovsrcu_set(>vlans, vlans);
 }
 
 mirror->out = out;
 mirror->out_vlan = out_vlan;
-mirror->snaplen = snaplen;
+mirror->snaplen = ms->snaplen;
 
 /* Update mbundles. */
 mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
@@ -406,23 +409,22 @@ mirror_update_stats(struct mbridge *mbridge, 
mirror_mask_t mirrors,
 /* Retrieves the mirror numbered 'index' in 'mbridge'.  Returns true if such a
  * mirror exists, false otherwise.
  *
- * If successful, '*vlans' receives the mirror's VLAN membership information,
+ * If successful 'mc->vlans' receives the mirror's VLAN membership information,
  * either a null pointer if the mirror includes all VLANs or a 4096-bit bitmap
  * in which a 1-bit indicates that the mirror includes a particular VLAN,
- * '*dup_mirrors' receives a bitmap of mirrors whose output duplicates mirror
- * 'index', '*out' receives the output ofbundle (if any), and '*out_vlan'
- * receives the output VLAN (if any).
+ * 'mc->dup_mirrors' receives a bitmap of mirrors whose output duplicates
+ * mirror 'index

Re: [ovs-dev] [PATCH v2] ovsdb: Use table indexes if available for ovsdb_query().

2024-06-19 Thread Mike Pattrick
On Wed, Jun 19, 2024 at 8:32 AM Simon Horman  wrote:
>
> On Mon, Jun 17, 2024 at 01:11:28PM -0400, Mike Pattrick wrote:
> > On Mon, Jun 3, 2024 at 2:01 PM Ilya Maximets  wrote:
> > >
> > > On 6/3/24 06:20, Mike Pattrick wrote:
> > > > Currently all OVSDB database queries except for UUID lookups all result
> > > > in linear lookups over the entire table, even if an index is present.
> > > >
> > > > This patch modifies ovsdb_query() to attempt an index lookup first, if
> > > > possible. If no matching indexes are present then a linear index is
> > > > still conducted.
> > > >
> > > > Reported-at: https://issues.redhat.com/browse/FDP-590
> > > > Signed-off-by: Mike Pattrick 
> > > > ---
> > > >  NEWS |   3 ++
> > > >  ovsdb/query.c| 102 +++
> > > >  ovsdb/row.h  |  28 +++
> > > >  ovsdb/transaction.c  |  27 ---
> > > >  tests/ovsdb-execution.at |  34 -
> > > >  tests/ovsdb-server.at|   2 +-
> > > >  tests/ovsdb-tool.at  |   2 +-
> > > >  7 files changed, 159 insertions(+), 39 deletions(-)
> > >
> > > Hi, Mike.  Thanks for the patch.
> > >
> > > Besides what Simon asked, the patch has a few other issues:
> > >
> > > 1. Lookup is performed only on the committed index and it doesn't include
> > >rows that are in-flight in the current transaction.
> > >
> > >Unlike rows in a hash table, indexes are updated only after the whole
> > >transaction is committed.  With this change we'll not be able to find
> > >newly added rows.
> > >
> > >Another thing related to this is that it is allowed to have duplicates
> > >within a transaction as long as they are removed before the transaction
> > >ends.  So it is possible that multiple rows will satisfy the condition
> > >on indexed columns while the transaction is in-flight.
> > >
> > >Consider the following commands executed in a sandbox:
> > >
> > ># ovs-vsctl set-manager "tcp:my-first-target"
> > ># ovsdb-client transact unix:$(pwd)/sandbox/db.sock '
> > >["Open_vSwitch",
> > > {"op": "select",
> > >  "table": "Manager",
> > >  "columns": ["_uuid", "target"],
> > >  "where": [["target", "==", "tcp:my-first-target"]]},
> > > {"op": "insert",
> > >  "table": "Manager",
> > >  "uuid-name": "duplicate",
> > >  "row": {"target": "tcp:my-first-target"}},
> > > {"op": "select",
> > >  "table": "Manager",
> > >  "columns": ["_uuid", "target"],
> > >  "where": [["target", "==", "tcp:my-first-target"]]},
> > > {"op": "delete",
> > >  "table": "Manager",
> > >  "where":[["_uuid","==",["named-uuid","duplicate"]]]},
> > > {"op": "select",
> > >  "table": "Manager",
> > >  "columns": ["_uuid", "target"],
> > >  "where": [["target", "==", "tcp:my-first-target"]]}]'
> > >
> > >Transaction must succeed.  The first selection should return 1 row,
> > >the second should return both duplicates and the third should again
> > >return one row.
> >
> > This is a good point, I hadn't anticipated this use-case but it does
> > have a large impact on this change. After working through a few
> > implementations, I wasn't able to find a solution that wasn't overly
> > complex. For the next version, I've instead opted to exclude indexed
> > lookups from transactions that modify the associated row.
> >
> > The next version should address this and the other feedback.
>
> Hi Mike,
>
> My inbox is confusing me somewhat,
> are you planing a v3 of this patch?

I seem to have sent the v3 as v2 by mistake. I will resubmit.

-M

>
> ...
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] ofp-prop: Fix unaligned 128 bit access.

2024-06-19 Thread Mike Pattrick
When compiling with '-fsanitize=address,undefined', the "ovs-ofctl
ct-flush" test will yield the following undefined behavior flagged by
UBSan. This problem is caused by the fact that 128bit property put/parse
functions weren't adding appropriate padding before writing or reading
the value.

This patch uses get_32aligned_* functions to copy the bytes as they are
aligned.

lib/ofp-prop.c:277:14: runtime error: load of misaligned address
0x6060687c for type 'union ovs_be128', which requires 8 byte
alignment
0x6060687c: note: pointer points here
  00 05 00 14 00 00 00 00  00 00 00 00 00 00 00 00  00 ff ab 00
  ^
0: in ofpprop_parse_u128 lib/ofp-prop.c:277
1: in ofp_ct_match_decode lib/ofp-ct.c:525
2: in ofp_print_nxt_ct_flush lib/ofp-print.c:959
3: in ofp_to_string__ lib/ofp-print.c:1206
4: in ofp_to_string lib/ofp-print.c:1264
5: in ofp_print lib/ofp-print.c:1308
6: in ofctl_ofp_print utilities/ovs-ofctl.c:4899
7: in ovs_cmdl_run_command__ lib/command-line.c:247
8: in ovs_cmdl_run_command lib/command-line.c:278
9: in main utilities/ovs-ofctl.c:186

Signed-off-by: Mike Pattrick 
---
v2: removed memcpy
---
 lib/ofp-prop.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/lib/ofp-prop.c b/lib/ofp-prop.c
index 0a685750c..842ecebea 100644
--- a/lib/ofp-prop.c
+++ b/lib/ofp-prop.c
@@ -21,6 +21,7 @@
 #include "openvswitch/ofp-errors.h"
 #include "openvswitch/ofp-prop.h"
 #include "openvswitch/vlog.h"
+#include "unaligned.h"
 #include "util.h"
 #include "uuid.h"
 
@@ -190,11 +191,12 @@ ofpprop_parse_be64(const struct ofpbuf *property, 
ovs_be64 *value)
 enum ofperr
 ofpprop_parse_be128(const struct ofpbuf *property, ovs_be128 *value)
 {
-ovs_be128 *p = property->msg;
+ovs_32aligned_be128 *p = property->msg;
+
 if (ofpbuf_msgsize(property) != sizeof *p) {
 return OFPERR_OFPBPC_BAD_LEN;
 }
-*value = *p;
+*value = get_32aligned_be128(p);
 return 0;
 }
 
@@ -270,12 +272,13 @@ ofpprop_parse_u64(const struct ofpbuf *property, uint64_t 
*value)
 enum ofperr
 ofpprop_parse_u128(const struct ofpbuf *property, ovs_u128 *value)
 {
-ovs_be128 *p = property->msg;
-if (ofpbuf_msgsize(property) != sizeof *p) {
-return OFPERR_OFPBPC_BAD_LEN;
+enum ofperr error = ofpprop_parse_be128(property, (ovs_be128 *)value);
+
+if (!error) {
+*value = ntoh128(*(ovs_be128 *)value);
 }
-*value = ntoh128(*p);
-return 0;
+
+return error;
 }
 
 /* Attempts to parse 'property' as a property containing a UUID.  If
-- 
2.39.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3] ovsdb: Use table indexes if available for ovsdb_query().

2024-06-19 Thread Mike Pattrick
Currently all OVSDB database queries except for UUID lookups all result
in linear lookups over the entire table, even if an index is present.

This patch modifies ovsdb_query() to attempt an index lookup first, if
possible. If no matching indexes are present then a linear index is
still conducted.

To test this, I set up an ovsdb database with a variable number of rows
and timed the average of how long ovsdb-client took to query a single
row. The first two tests involved a linear scan that didn't match any
rows, so there was no overhead associated with sending or encoding
output. The post-patch linear scan was a worst case scenario where the
table did have an appropriate index but the conditions made its usage
impossible. The indexed lookup test was for a matching row, which did
also include overhead associated with a match. The results are included
in the table below.

Rows   | 100k | 200k | 300k | 400k | 500k
---+--+--+--+--+-
Pre-patch linear scan  |  9ms | 24ms | 37ms | 49ms | 61ms
Post-patch linear scan |  9ms | 24ms | 38ms | 49ms | 61ms
Indexed lookup |  3ms |  3ms |  3ms |  3ms |  3ms

I also tested the performance of ovsdb_query() by wrapping it in a loop
and measuring the time it took to perform 1000 linear scans on 1, 10,
100k, and 200k rows. This test showed that the new index checking code
did not slow down worst case lookups to a statistically detectable
degree.

Reported-at: https://issues.redhat.com/browse/FDP-590
Signed-off-by: Mike Pattrick 

---

v3:
 - Included txn in index code
 - Added benchmarks
 - Refactored code
 - Added more tests
 - Now a mock row is created to perform the search with standard
 functions
Signed-off-by: Mike Pattrick 
---
 ovsdb/execution.c|  20 +++--
 ovsdb/query.c| 174 +++
 ovsdb/query.h|   6 +-
 ovsdb/rbac.c |  15 ++--
 ovsdb/rbac.h |  10 ++-
 ovsdb/row.h  |  28 +++
 ovsdb/transaction.c  |  29 +--
 ovsdb/transaction.h  |   5 ++
 tests/ovsdb-execution.at | 108 +++-
 tests/ovsdb-macros.at|  10 +++
 tests/ovsdb-query.at |  18 ++--
 tests/ovsdb-server.at|   2 +-
 tests/ovsdb-tool.at  |   2 +-
 tests/test-ovsdb.c   |  15 +++-
 14 files changed, 363 insertions(+), 79 deletions(-)

diff --git a/ovsdb/execution.c b/ovsdb/execution.c
index f4cc9e802..212839bca 100644
--- a/ovsdb/execution.c
+++ b/ovsdb/execution.c
@@ -459,7 +459,7 @@ ovsdb_execute_select(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 if (!error) {
 struct ovsdb_row_set rows = OVSDB_ROW_SET_INITIALIZER;
 
-ovsdb_query_distinct(table, , , );
+ovsdb_query_distinct(table, , , , x->txn);
 ovsdb_row_set_sort(, );
 json_object_put(result, "rows",
 ovsdb_row_set_to_json(, ));
@@ -545,8 +545,8 @@ ovsdb_execute_update(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 ur.row = row;
 ur.columns = 
 if (ovsdb_rbac_update(x->db, table, , , x->role,
-  x->id)) {
-ovsdb_query(table, , update_row_cb, );
+  x->id, x->txn)) {
+ovsdb_query(table, , update_row_cb, , x->txn);
 } else {
 error = ovsdb_perm_error("RBAC rules for client \"%s\" role "
  "\"%s\" prohibit modification of "
@@ -626,7 +626,7 @@ ovsdb_execute_mutate(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 json_integer_create(hmap_count(>rows)));
 } else {
 size_t row_count = 0;
-ovsdb_query(table, , count_row_cb, _count);
+ovsdb_query(table, , count_row_cb, _count, x->txn);
 json_object_put(result, "count",
 json_integer_create(row_count));
 }
@@ -636,8 +636,8 @@ ovsdb_execute_mutate(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 mr.mutations = 
 mr.error = 
 if (ovsdb_rbac_mutate(x->db, table, , , x->role,
-  x->id)) {
-ovsdb_query(table, , mutate_row_cb, );
+  x->id, x->txn)) {
+ovsdb_query(table, , mutate_row_cb, , x->txn);
 } else {
 error = ovsdb_perm_error("RBAC rules for client \"%s\" role "
  "\"%s\" prohibit mutate operation on "
@@ -693,8 +693,9 @@ ovsdb_execute_delete(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 dr.table = table;
 dr.txn = x->txn;
 
-if (ovsdb_rbac_delete(x->db, table, , x->role, x->id)) {
-ovsdb_query(table, , delete_row_cb, );
+if (ovsdb_rbac_delete(x->db, tab

<    2   3   4   5   6   7   8   >