[ovs-dev] [PATCH ovn] controller: Add random delay during fdb learning.

2024-06-25 Thread Naveen Yerramneni
This change reduces the probability of conflicts when
multiple nodes tries to add the same FDB entry to SB at the
same time. When conflict occurs, OVN controller does full
recompute which is heavy weight on the scale setup.

Signed-off-by: Naveen Yerramneni 
Suggested-by: Dumitru Ceara 
---
 controller/mac-cache.c  |  4 +++-
 controller/mac-cache.h  |  4 +++-
 controller/ovn-controller.c |  2 +-
 controller/pinctrl.c| 28 
 tests/ovn.at|  6 +++---
 5 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/controller/mac-cache.c b/controller/mac-cache.c
index d8c4e2aed..de45d7a6a 100644
--- a/controller/mac-cache.c
+++ b/controller/mac-cache.c
@@ -245,7 +245,8 @@ mac_binding_lookup(struct ovsdb_idl_index 
*sbrec_mac_binding_by_lport_ip,
 
 /* FDB. */
 struct fdb *
-fdb_add(struct hmap *map, struct fdb_data fdb_data) {
+fdb_add(struct hmap *map, struct fdb_data fdb_data, long long timestamp)
+{
 struct fdb *fdb = fdb_find(map, &fdb_data);
 
 if (!fdb) {
@@ -255,6 +256,7 @@ fdb_add(struct hmap *map, struct fdb_data fdb_data) {
 }
 
 fdb->data = fdb_data;
+fdb->timestamp = timestamp;
 
 return fdb;
 }
diff --git a/controller/mac-cache.h b/controller/mac-cache.h
index 3c78f9440..b94e7a1cf 100644
--- a/controller/mac-cache.h
+++ b/controller/mac-cache.h
@@ -83,6 +83,7 @@ struct fdb {
 struct fdb_data data;
 /* Reference to the SB FDB record. */
 const struct sbrec_fdb *sbrec_fdb;
+long long timestamp;
 };
 
 struct bp_packet_data {
@@ -149,7 +150,8 @@ mac_binding_lookup(struct ovsdb_idl_index 
*sbrec_mac_binding_by_lport_ip,
const char *logical_port, const char *ip);
 
 /* FDB. */
-struct fdb *fdb_add(struct hmap *map, struct fdb_data fdb_data);
+struct fdb *fdb_add(struct hmap *map, struct fdb_data fdb_data,
+long long timestamp);
 
 void fdb_remove(struct hmap *map, struct fdb *fdb);
 
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 6874f99a3..e99bdb3ef 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -3322,7 +3322,7 @@ fdb_add_sb(struct mac_cache_data *data, const struct 
sbrec_fdb *sfdb)
 return;
 }
 
-struct fdb *fdb = fdb_add(&data->fdbs, fdb_data);
+struct fdb *fdb = fdb_add(&data->fdbs, fdb_data, 0);
 
 fdb->sbrec_fdb = sfdb;
 }
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index f2e382a44..e3ded793b 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -4739,6 +4739,7 @@ pinctrl_destroy(void)
 /* Buffered "put_mac_binding" operation. */
 
 #define MAX_MAC_BINDING_DELAY_MSEC 50
+#define MAX_FDB_DELAY_MSEC 50
 #define MAX_MAC_BINDINGS   1000
 
 /* Contains "struct mac_binding"s. */
@@ -8961,21 +8962,30 @@ run_put_fdbs(struct ovsdb_idl_txn *ovnsb_idl_txn,
 return;
 }
 
+long long now = time_msec();
 const struct fdb *fdb;
-HMAP_FOR_EACH (fdb, hmap_node, &put_fdbs) {
-run_put_fdb(ovnsb_idl_txn, sbrec_fdb_by_dp_key_mac,
-sbrec_port_binding_by_key,
-sbrec_datapath_binding_by_key, fdb);
+HMAP_FOR_EACH_SAFE (fdb, hmap_node, &put_fdbs) {
+if (now >= fdb->timestamp) {
+run_put_fdb(ovnsb_idl_txn, sbrec_fdb_by_dp_key_mac,
+sbrec_port_binding_by_key,
+sbrec_datapath_binding_by_key, fdb);
+fdb_remove(&put_fdbs, fdb);
+}
 }
-fdbs_clear(&put_fdbs);
 }
 
 
 static void
 wait_put_fdbs(struct ovsdb_idl_txn *ovnsb_idl_txn)
+OVS_REQUIRES(pinctrl_mutex)
 {
-if (ovnsb_idl_txn && !hmap_is_empty(&put_fdbs)) {
-poll_immediate_wake();
+if (!ovnsb_idl_txn) {
+return;
+}
+
+struct fdb *fdb;
+HMAP_FOR_EACH (fdb, hmap_node, &put_fdbs) {
+poll_timer_wait_until(fdb->timestamp);
 }
 }
 
@@ -8995,6 +9005,8 @@ pinctrl_handle_put_fdb(const struct flow *md, const 
struct flow *headers)
 .mac = headers->dl_src,
 };
 
-fdb_add(&put_fdbs, fdb_data);
+uint32_t delay = random_range(MAX_FDB_DELAY_MSEC) + 1;
+long long timestamp = time_msec() + delay;
+fdb_add(&put_fdbs, fdb_data, timestamp);
 notify_pinctrl_main();
 }
diff --git a/tests/ovn.at b/tests/ovn.at
index af0eaeed3..7b199d3f5 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -34382,21 +34382,21 @@ AT_CHECK([ovn-nbctl --wait=hv sync])
 # Learning is disabled, the table should be empty
 send_packet 20
 AT_CHECK([ovn-nbctl --wait=hv sync])
-AT_CHECK([test $(ovn-sbctl list fdb | grep -c "00:00:00:00:10:20") = 0])
+OVS_WAIT_UNTIL([test $(ovn-sbctl list fdb | grep -c "00:00:00:00:10:20") = 0])
 
 # Enable learning on localnet port
 AT_CHECK([ovn-nbctl set logical_switch_port ln_port 
options:localnet_learn_fdb=true])
 AT_CHECK([ovn-nbctl --wait=hv sync])
 send_packet 20
 AT_CHECK([ovn-nbctl --wait=hv sync])
-AT_CHECK([test $(ovn-sbctl list fdb | grep -c "00:00:00:00:10:20") = 1])
+OVS_WAI

Re: [ovs-dev] [PATCH ovn v4 4/4] Controller, northd: Add support for CT zone limits.

2024-06-25 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 84 characters long (recommended limit is 79)
#502 FILE: ovn-nb.xml:726:
  type='{"type": "integer", "minInteger": 0, "maxInteger": 
4294967295}'>

WARNING: Line is 86 characters long (recommended limit is 79)
#519 FILE: ovn-nb.xml:1137:
type='{"type": "integer", "minInteger": 0, "maxInteger": 
4294967295}'>

WARNING: Line is 84 characters long (recommended limit is 79)
#537 FILE: ovn-nb.xml:2811:
  type='{"type": "integer", "minInteger": 0, "maxInteger": 
4294967295}'>

Lines checked: 656, Warnings: 3, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v4 1/4] controller: Move CT zone handling into separate module.

2024-06-25 Thread 0-day Robot
Bleep bloop.  Greetings Ales Musil, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Comment with 'xxx' marker
#191 FILE: controller/ct-zone.c:146:
/* XXX Add method to limit zone assignment to logical router

WARNING: Comment with 'xxx' marker
#269 FILE: controller/ct-zone.c:224:
/* xxx This is wasteful to assign a zone to each port--even if no

WARNING: Comment with 'xxx' marker
#270 FILE: controller/ct-zone.c:225:
 * xxx security policy is applied. */

Lines checked: 1112, Warnings: 3, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v4 4/4] Controller, northd: Add support for CT zone limits.

2024-06-25 Thread Ales Musil
Add support for limiting the CT zone usage per Ls, LR or LSP.
When the limit is configured on logical switch it will also implicitly
set limits for all ports in that logical switch. The port configuration
can be overwritten individually and has priority over the whole logical
switch configuration.

The value 0 means unlimited, when the value is not specified it is
derived from OvS default CT limit specified for given OvS datapath.

Reported-at: https://bugzilla.redhat.com/2189924
Signed-off-by: Ales Musil 
---
v4: Rebase on top of latest main.
Change naming of the ct_zone_limit_sync to avoid potential confusion as 
suggested by Lorenzo.
v3: Rebase on top of latest main.
---
 NEWS|   3 +
 controller/ct-zone.c| 175 
 controller/ct-zone.h|  12 ++-
 controller/ovn-controller.c |  25 +-
 lib/ovn-util.c  |  17 
 lib/ovn-util.h  |   3 +
 northd/northd.c |   8 ++
 ovn-nb.xml  |  29 ++
 tests/ovn-controller.at |  99 
 9 files changed, 349 insertions(+), 22 deletions(-)

diff --git a/NEWS b/NEWS
index 01db77d57..f50d68a82 100644
--- a/NEWS
+++ b/NEWS
@@ -34,6 +34,9 @@ Post v24.03.0
   - NATs can now be given an arbitrary match condition and priority. This
 allows for conditional NATs to be configured. See the ovn-nb(5) man
 page for more information.
+  - Add support for CT zone limit that can be specified per LR
+(options:ct-zone-limit), LS (other_config:ct-zone-limit) or LSP
+(options:ct-zone-limit).
 
 OVN v24.03.0 - 01 Mar 2024
 --
diff --git a/controller/ct-zone.c b/controller/ct-zone.c
index 6ed1e4108..97f0de6c3 100644
--- a/controller/ct-zone.c
+++ b/controller/ct-zone.c
@@ -34,6 +34,18 @@ static bool ct_zone_assign_unused(struct ct_zone_ctx *ctx,
 static bool ct_zone_remove(struct ct_zone_ctx *ctx, const char *name);
 static void ct_zone_add(struct ct_zone_ctx *ctx, const char *name,
 uint16_t zone, bool set_pending);
+static void
+ct_zone_limits_update_per_dp(struct ct_zone_ctx *ctx,
+ const struct sbrec_datapath_binding *dp,
+ const char *name,
+ struct ovsdb_idl_index *pb_by_dp);
+static void ct_zone_limit_update(struct ct_zone_ctx *ctx, const char *name,
+ int64_t limit);
+static int64_t ct_zone_get_dp_limit(const struct sbrec_datapath_binding *dp);
+static int64_t ct_zone_get_pb_limit(const struct sbrec_port_binding *pb);
+static int64_t ct_zone_limit_normalize(int64_t limit);
+static struct ovsrec_ct_zone *
+ct_zone_find_ovsrec(const struct ovsrec_datapath *dp, uint16_t zone_id);
 
 void
 ct_zones_restore(struct ct_zone_ctx *ctx,
@@ -196,11 +208,14 @@ ct_zones_update(const struct sset *local_lports,
 
 void
 ct_zones_commit(const struct ovsrec_bridge *br_int,
+const struct ovsrec_datapath *ovs_dp,
+struct ovsdb_idl_txn *ovs_idl_txn,
 struct shash *pending_ct_zones)
 {
 struct shash_node *iter;
 SHASH_FOR_EACH (iter, pending_ct_zones) {
 struct ct_zone_pending_entry *ctzpe = iter->data;
+struct ct_zone *ct_zone = &ctzpe->ct_zone;
 
 /* The transaction is open, so any pending entries in the
  * CT_ZONE_DB_QUEUED must be sent and any in CT_ZONE_DB_QUEUED
@@ -212,7 +227,7 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
 
 char *user_str = xasprintf("ct-zone-%s", iter->name);
 if (ctzpe->add) {
-char *zone_str = xasprintf("%"PRIu16, ctzpe->ct_zone.zone);
+char *zone_str = xasprintf("%"PRIu16, ct_zone->zone);
 struct smap_node *node =
 smap_get_node(&br_int->external_ids, user_str);
 if (!node || strcmp(node->value, zone_str)) {
@@ -227,6 +242,19 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
 }
 free(user_str);
 
+struct ovsrec_ct_zone *ovs_zone =
+ct_zone_find_ovsrec(ovs_dp, ct_zone->zone);
+if ((!ctzpe->add || ct_zone->limit < 0) && ovs_zone) {
+ovsrec_datapath_update_ct_zones_delkey(ovs_dp, ct_zone->zone);
+} else if (ctzpe->add && ct_zone->limit >= 0) {
+if (!ovs_zone) {
+ovs_zone = ovsrec_ct_zone_insert(ovs_idl_txn);
+ovsrec_datapath_update_ct_zones_setkey(ovs_dp, ct_zone->zone,
+   ovs_zone);
+}
+ovsrec_ct_zone_set_limit(ovs_zone, &ct_zone->limit, 1);
+}
+
 ctzpe->state = CT_ZONE_DB_SENT;
 }
 }
@@ -247,8 +275,19 @@ ct_zones_pending_clear_commited(struct shash *pending)
 /* Returns "true" when there is no need for full recompute. */
 bool
 ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
- const struct sbrec_datapath_binding *dp)
+  

[ovs-dev] [PATCH ovn v4 3/4] controller: Prepare structure around CT zone limiting.

2024-06-25 Thread Ales Musil
In order to be able to store CT limits for specified zone, store the
zone inside separate struct instead of simap. This allows to add
the addition of limit without changing the whole infrastructure again.

This is a preparation step for the CT zone limits.

Signed-off-by: Ales Musil 
---
v4: Rebase on top of latest main.
Address comments from Lorenzo.
v3: Rebase on top of latest main.
v2: Fix NULL ptr deref.
---
 controller/ct-zone.c| 169 +---
 controller/ct-zone.h|  13 ++-
 controller/ofctrl.c |   2 +-
 controller/ovn-controller.c |  15 ++--
 controller/physical.c   |  17 ++--
 controller/physical.h   |   2 +-
 6 files changed, 126 insertions(+), 92 deletions(-)

diff --git a/controller/ct-zone.c b/controller/ct-zone.c
index e4f66a52a..6ed1e4108 100644
--- a/controller/ct-zone.c
+++ b/controller/ct-zone.c
@@ -26,12 +26,14 @@ ct_zone_restore(const struct sbrec_datapath_binding_table 
*dp_table,
 struct ct_zone_ctx *ctx, const char *name, int zone);
 static void ct_zone_add_pending(struct shash *pending_ct_zones,
 enum ct_zone_pending_state state,
-int zone, bool add, const char *name);
+struct ct_zone *zone, bool add,
+const char *name);
 static int ct_zone_get_snat(const struct sbrec_datapath_binding *dp);
 static bool ct_zone_assign_unused(struct ct_zone_ctx *ctx,
   const char *zone_name, int *scan_start);
-static bool ct_zone_remove(struct ct_zone_ctx *ctx,
-   struct simap_node *ct_zone);
+static bool ct_zone_remove(struct ct_zone_ctx *ctx, const char *name);
+static void ct_zone_add(struct ct_zone_ctx *ctx, const char *name,
+uint16_t zone, bool set_pending);
 
 void
 ct_zones_restore(struct ct_zone_ctx *ctx,
@@ -47,7 +49,8 @@ ct_zones_restore(struct ct_zone_ctx *ctx,
 struct ct_zone_pending_entry *ctpe = pending_node->data;
 
 if (ctpe->add) {
-ct_zone_restore(dp_table, ctx, pending_node->name, ctpe->zone);
+ct_zone_restore(dp_table, ctx, pending_node->name,
+ctpe->ct_zone.zone);
 }
 }
 
@@ -91,7 +94,6 @@ void
 ct_zones_update(const struct sset *local_lports,
 const struct hmap *local_datapaths, struct ct_zone_ctx *ctx)
 {
-struct simap_node *ct_zone;
 int scan_start = 1;
 const char *user;
 struct sset all_users = SSET_INITIALIZER(&all_users);
@@ -132,12 +134,14 @@ ct_zones_update(const struct sset *local_lports,
 }
 
 /* Delete zones that do not exist in above sset. */
-SIMAP_FOR_EACH_SAFE (ct_zone, &ctx->current) {
-if (!sset_contains(&all_users, ct_zone->name)) {
-ct_zone_remove(ctx, ct_zone);
-} else if (!simap_find(&req_snat_zones, ct_zone->name)) {
-bitmap_set1(unreq_snat_zones_map, ct_zone->data);
-simap_put(&unreq_snat_zones, ct_zone->name, ct_zone->data);
+struct shash_node *node;
+SHASH_FOR_EACH_SAFE (node, &ctx->current) {
+struct ct_zone *ct_zone = node->data;
+if (!sset_contains(&all_users, node->name)) {
+ct_zone_remove(ctx, node->name);
+} else if (!simap_find(&req_snat_zones, node->name)) {
+bitmap_set1(unreq_snat_zones_map, ct_zone->zone);
+simap_put(&unreq_snat_zones, node->name, ct_zone->zone);
 }
 }
 
@@ -152,7 +156,7 @@ ct_zones_update(const struct sset *local_lports,
 struct simap_node *unreq_node;
 SIMAP_FOR_EACH_SAFE (unreq_node, &unreq_snat_zones) {
 if (unreq_node->data == snat_req_node->data) {
-simap_find_and_delete(&ctx->current, unreq_node->name);
+ct_zone_remove(ctx, unreq_node->name);
 simap_delete(&unreq_snat_zones, unreq_node);
 }
 }
@@ -163,26 +167,12 @@ ct_zones_update(const struct sset *local_lports,
 bitmap_set0(unreq_snat_zones_map, snat_req_node->data);
 }
 
-struct simap_node *node = simap_find(&ctx->current,
- snat_req_node->name);
-if (node) {
-if (node->data != snat_req_node->data) {
-/* Zone request has changed for this node. delete old entry and
- * create new one*/
-ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
-snat_req_node->data, true,
-snat_req_node->name);
-bitmap_set0(ctx->bitmap, node->data);
-}
-bitmap_set1(ctx->bitmap, snat_req_node->data);
-node->data = snat_req_node->data;
-} else {
-ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
-snat_req_node->data, tr

[ovs-dev] [PATCH ovn v4 2/4] controller: Further encapsulate the CT zone handling.

2024-06-25 Thread Ales Musil
Move more code into the new ct-zone module and encapsulate
functionality that is strictly related to CT zone handling.

Signed-off-by: Ales Musil 
Acked-by: Lorenzo Bianconi 
---
v4: Rebase on top of latest main.
    Added ack from Lorenzo and fixed one nit.
v3: Rebase on top of latest main.
---
 controller/ct-zone.c| 156 +---
 controller/ct-zone.h|   8 +-
 controller/ovn-controller.c |  50 ++--
 3 files changed, 119 insertions(+), 95 deletions(-)

diff --git a/controller/ct-zone.c b/controller/ct-zone.c
index 3e37fedb6..e4f66a52a 100644
--- a/controller/ct-zone.c
+++ b/controller/ct-zone.c
@@ -27,6 +27,11 @@ ct_zone_restore(const struct sbrec_datapath_binding_table 
*dp_table,
 static void ct_zone_add_pending(struct shash *pending_ct_zones,
 enum ct_zone_pending_state state,
 int zone, bool add, const char *name);
+static int ct_zone_get_snat(const struct sbrec_datapath_binding *dp);
+static bool ct_zone_assign_unused(struct ct_zone_ctx *ctx,
+  const char *zone_name, int *scan_start);
+static bool ct_zone_remove(struct ct_zone_ctx *ctx,
+   struct simap_node *ct_zone);
 
 void
 ct_zones_restore(struct ct_zone_ctx *ctx,
@@ -82,47 +87,6 @@ ct_zones_restore(struct ct_zone_ctx *ctx,
 }
 }
 
-bool
-ct_zone_assign_unused(struct ct_zone_ctx *ctx, const char *zone_name,
-  int *scan_start)
-{
-/* We assume that there are 64K zones and that we own them all. */
-int zone = bitmap_scan(ctx->bitmap, 0, *scan_start, MAX_CT_ZONES + 1);
-if (zone == MAX_CT_ZONES + 1) {
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-VLOG_WARN_RL(&rl, "exhausted all ct zones");
-return false;
-}
-
-*scan_start = zone + 1;
-
-ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
-zone, true, zone_name);
-
-bitmap_set1(ctx->bitmap, zone);
-simap_put(&ctx->current, zone_name, zone);
-return true;
-}
-
-bool
-ct_zone_remove(struct ct_zone_ctx *ctx, const char *name)
-{
-struct simap_node *ct_zone = simap_find(&ctx->current, name);
-if (!ct_zone) {
-return false;
-}
-
-VLOG_DBG("removing ct zone %"PRId32" for '%s'", ct_zone->data,
- ct_zone->name);
-
-ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
-ct_zone->data, false, ct_zone->name);
-bitmap_set0(ctx->bitmap, ct_zone->data);
-simap_delete(&ctx->current, ct_zone);
-
-return true;
-}
-
 void
 ct_zones_update(const struct sset *local_lports,
 const struct hmap *local_datapaths, struct ct_zone_ctx *ctx)
@@ -170,7 +134,7 @@ ct_zones_update(const struct sset *local_lports,
 /* Delete zones that do not exist in above sset. */
 SIMAP_FOR_EACH_SAFE (ct_zone, &ctx->current) {
 if (!sset_contains(&all_users, ct_zone->name)) {
-ct_zone_remove(ctx, ct_zone->name);
+ct_zone_remove(ctx, ct_zone);
 } else if (!simap_find(&req_snat_zones, ct_zone->name)) {
 bitmap_set1(unreq_snat_zones_map, ct_zone->data);
 simap_put(&unreq_snat_zones, ct_zone->name, ct_zone->data);
@@ -277,12 +241,6 @@ ct_zones_commit(const struct ovsrec_bridge *br_int,
 }
 }
 
-int
-ct_zone_get_snat(const struct sbrec_datapath_binding *dp)
-{
-return smap_get_int(&dp->external_ids, "snat-ct-zone", -1);
-}
-
 void
 ct_zones_pending_clear_commited(struct shash *pending)
 {
@@ -296,6 +254,108 @@ ct_zones_pending_clear_commited(struct shash *pending)
 }
 }
 
+/* Returns "true" when there is no need for full recompute. */
+bool
+ct_zone_handle_dp_update(struct ct_zone_ctx *ctx,
+ const struct sbrec_datapath_binding *dp)
+{
+int req_snat_zone = ct_zone_get_snat(dp);
+if (req_snat_zone == -1) {
+/* datapath snat ct zone is not set.  This condition will also hit
+ * when CMS clears the snat-ct-zone for the logical router.
+ * In this case there is no harm in using the previosly specified
+ * snat ct zone for this datapath.  Also it is hard to know
+ * if this option was cleared or if this option is never set. */
+return true;
+}
+
+const char *name = smap_get(&dp->external_ids, "name");
+if (!name) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
+"zone check.", UUID_ARGS(&dp->header_.uuid));
+return true;
+}
+
+/* Check if the requested snat zone has changed for the datapath
+ * or not.  If so, then fall back to full recompute of
+ * ct_zone engine. */
+char *snat_dp_zone_key = alloc_nat_zone_key(name, "snat");
+struct simap_node *simap_node =
+simap_find(&ctx->current, snat_dp_zone_key);
+free(snat_dp_zone_key);

[ovs-dev] [PATCH ovn v4 1/4] controller: Move CT zone handling into separate module.

2024-06-25 Thread Ales Musil
Move the CT zone handling specific bits into its own module. This
allows for easier changes done within the module and separates the
logic that is unrelated from ovn-controller.

Signed-off-by: Ales Musil 
Acked-by: Lorenzo Bianconi 
---
v4: Rebase on top of latest main.
Added ack from Lorenzo.
v3: Rebase on top of latest main.
---
 controller/automake.mk  |   4 +-
 controller/ct-zone.c| 378 ++
 controller/ct-zone.h|  74 +++
 controller/ofctrl.c |   3 +-
 controller/ovn-controller.c | 393 +++-
 controller/ovn-controller.h |  21 +-
 controller/pinctrl.c|   2 +-
 tests/ovn.at|   4 +-
 8 files changed, 486 insertions(+), 393 deletions(-)
 create mode 100644 controller/ct-zone.c
 create mode 100644 controller/ct-zone.h

diff --git a/controller/automake.mk b/controller/automake.mk
index 1b1b3aeb1..ed93cfb3c 100644
--- a/controller/automake.mk
+++ b/controller/automake.mk
@@ -47,7 +47,9 @@ controller_ovn_controller_SOURCES = \
controller/mac-cache.h \
controller/mac-cache.c \
controller/statctrl.h \
-   controller/statctrl.c
+   controller/statctrl.c \
+   controller/ct-zone.h \
+   controller/ct-zone.c
 
 controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/libopenvswitch.la
 man_MANS += controller/ovn-controller.8
diff --git a/controller/ct-zone.c b/controller/ct-zone.c
new file mode 100644
index 0..3e37fedb6
--- /dev/null
+++ b/controller/ct-zone.c
@@ -0,0 +1,378 @@
+/* Copyright (c) 2024, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+
+#include "ct-zone.h"
+#include "local_data.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(ct_zone);
+
+static void
+ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
+struct ct_zone_ctx *ctx, const char *name, int zone);
+static void ct_zone_add_pending(struct shash *pending_ct_zones,
+enum ct_zone_pending_state state,
+int zone, bool add, const char *name);
+
+void
+ct_zones_restore(struct ct_zone_ctx *ctx,
+ const struct ovsrec_open_vswitch_table *ovs_table,
+ const struct sbrec_datapath_binding_table *dp_table,
+ const struct ovsrec_bridge *br_int)
+{
+memset(ctx->bitmap, 0, sizeof ctx->bitmap);
+bitmap_set1(ctx->bitmap, 0); /* Zone 0 is reserved. */
+
+struct shash_node *pending_node;
+SHASH_FOR_EACH (pending_node, &ctx->pending) {
+struct ct_zone_pending_entry *ctpe = pending_node->data;
+
+if (ctpe->add) {
+ct_zone_restore(dp_table, ctx, pending_node->name, ctpe->zone);
+}
+}
+
+const struct ovsrec_open_vswitch *cfg;
+cfg = ovsrec_open_vswitch_table_first(ovs_table);
+if (!cfg) {
+return;
+}
+
+if (!br_int) {
+/* If the integration bridge hasn't been defined, assume that
+ * any existing ct-zone definitions aren't valid. */
+return;
+}
+
+struct smap_node *node;
+SMAP_FOR_EACH (node, &br_int->external_ids) {
+if (strncmp(node->key, "ct-zone-", 8)) {
+continue;
+}
+
+const char *user = node->key + 8;
+if (!user[0]) {
+continue;
+}
+
+if (shash_find(&ctx->pending, user)) {
+continue;
+}
+
+unsigned int zone;
+if (!str_to_uint(node->value, 10, &zone)) {
+continue;
+}
+
+ct_zone_restore(dp_table, ctx, user, zone);
+}
+}
+
+bool
+ct_zone_assign_unused(struct ct_zone_ctx *ctx, const char *zone_name,
+  int *scan_start)
+{
+/* We assume that there are 64K zones and that we own them all. */
+int zone = bitmap_scan(ctx->bitmap, 0, *scan_start, MAX_CT_ZONES + 1);
+if (zone == MAX_CT_ZONES + 1) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+VLOG_WARN_RL(&rl, "exhausted all ct zones");
+return false;
+}
+
+*scan_start = zone + 1;
+
+ct_zone_add_pending(&ctx->pending, CT_ZONE_OF_QUEUED,
+zone, true, zone_name);
+
+bitmap_set1(ctx->bitmap, zone);
+simap_put(&ctx->current, zone_name, zone);
+return true;
+}
+
+bool
+ct_zone_remove(struct ct_zone_ctx *ctx, const char *name)
+{
+struct simap_node *ct_zone = simap_find(&ctx->

[ovs-dev] [PATCH ovn v4 0/4] Add ability to limit CT entries per LS/LR/LSP

2024-06-25 Thread Ales Musil
Add ability that allows to set CT limits per logical switch, logical
router or logical switch port. When the limit is applied to logical
switch it will be implicitly set for all logical ports in the logical
switch. This can be overwritten individually per port.

To achieve this there is a small refactor of the CT zone handling logic
which allows us to get the zone limiting more easily.

Ales Musil (4):
  controller: Move CT zone handling into separate module.
  controller: Further encapsulate the CT zone handling.
  controller: Prepare structure around CT zone limiting.
  Controller, northd: Add support for CT zone limits.

 NEWS|   3 +
 controller/automake.mk  |   4 +-
 controller/ct-zone.c| 606 
 controller/ct-zone.h|  89 ++
 controller/ofctrl.c |   5 +-
 controller/ovn-controller.c | 453 +++
 controller/ovn-controller.h |  21 +-
 controller/physical.c   |  17 +-
 controller/physical.h   |   2 +-
 controller/pinctrl.c|   2 +-
 lib/ovn-util.c  |  17 +
 lib/ovn-util.h  |   3 +
 northd/northd.c |   8 +
 ovn-nb.xml  |  29 ++
 tests/ovn-controller.at |  99 ++
 tests/ovn.at|   4 +-
 16 files changed, 920 insertions(+), 442 deletions(-)
 create mode 100644 controller/ct-zone.c
 create mode 100644 controller/ct-zone.h

-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC v2 5/9] ofproto-dpif-xlate: Use emit_sample for local sample.

2024-06-25 Thread Adrián Moreno
On Tue, Jun 25, 2024 at 11:04:42PM GMT, Ilya Maximets wrote:
> On 6/25/24 22:53, Adrián Moreno wrote:
> > On Mon, Jun 24, 2024 at 07:43:32PM GMT, Adrián Moreno wrote:
> >> On Mon, Jun 24, 2024 at 04:05:36PM GMT, Ilya Maximets wrote:
> >>> On 6/24/24 15:19, Adrián Moreno wrote:
>  On Mon, Jun 24, 2024 at 01:37:44PM GMT, Ilya Maximets wrote:
> > On 6/5/24 22:23, Adrian Moreno wrote:
> >> Use the newly added emit_sample to implement OpenFlow sample() actions
> >> with local sampling configuration.
> >>
> >> Signed-off-by: Adrian Moreno 
> >> ---
> >>  ofproto/ofproto-dpif-lsample.c |  17 
> >>  ofproto/ofproto-dpif-lsample.h |   6 ++
> >>  ofproto/ofproto-dpif-xlate.c   | 163 -
> >>  ofproto/ofproto-dpif-xlate.h   |   3 +-
> >>  ofproto/ofproto-dpif.c |   2 +-
> >>  5 files changed, 144 insertions(+), 47 deletions(-)
> >
> > Not a full review, just a note that this patch needs tests in 
> > ofproto-dpif.at
> > that would check that we generate correct datapath flows.
> >
> 
>  I thought about that, but ofproto-dpif.at is based on dummy datapat
>  (userspace) which does not support this action. The configuration will
>  be ignored and the datapath actions will not be generated. That's why I
>  relied on system-traffic.at tests. Do you see any way around this?
> >>>
> >>> We don't need actual datapath support if we're not sending any actual
> >>> trafic.  You should be able to just turn on the capability and check
> >>> that ofproto/trace generates correct actions.
> >>>
> >>> We test kernel tunnels this way, for example.  See the macro
> >>> OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP.  And some other similar checks.
> >>>
> >>
> >
> > Tried it but it doesn't seem to work. I now remember I hit this issue
> > originally :-).
> > We cannot enable a capability beyond the datapath's "boot time"
> > capabilities.
> >
> > If you try to run
> > "ovs-appctl dpif/set-dp-features br0 emit_sample true", the command
> > fails with:
> > "Can not enable features not supported by the datapth"
> >
> > The safeguard does make sense in general (prevents users from
> > knee-capping themselves) but for testing maybe we want to force it?
> >
> > OTOH, marking this feature as supported in the userspace datapath would
> > not be catastrophic, we could just warn the action is not supported
> > in odp_execute(), or consider VLOG_DBG_RL as current implementation
> > until we come up with a good one. However, I cannot of a good way of
> > of properly reporting this to the user beforehand so I think it could
> > still cause a lot of confusion.
> >
> > Thoughts?
>
> Hmm.  This command is generally for testing only or experiments.
> It's not documented and hence we do not support changing datapath
> features in runtime.

Oh! It's true that it's docummented, although it does appear in
"list-commands". I thought we supported disabling features, I guess we
might on a case-by-case basis and through OVSDB.

>
> It should be fine to add another command like force-dp-features to
> overcome the restriction.  Just make sure that it is not documented
> and doesn't appear in the list-commands output.
>

If "set-dp-features" is already an exceptional path, can't we just add a
"--force" option to it?

Thanks.
Adrián

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC v2 5/9] ofproto-dpif-xlate: Use emit_sample for local sample.

2024-06-25 Thread Ilya Maximets
On 6/25/24 22:53, Adrián Moreno wrote:
> On Mon, Jun 24, 2024 at 07:43:32PM GMT, Adrián Moreno wrote:
>> On Mon, Jun 24, 2024 at 04:05:36PM GMT, Ilya Maximets wrote:
>>> On 6/24/24 15:19, Adrián Moreno wrote:
 On Mon, Jun 24, 2024 at 01:37:44PM GMT, Ilya Maximets wrote:
> On 6/5/24 22:23, Adrian Moreno wrote:
>> Use the newly added emit_sample to implement OpenFlow sample() actions
>> with local sampling configuration.
>>
>> Signed-off-by: Adrian Moreno 
>> ---
>>  ofproto/ofproto-dpif-lsample.c |  17 
>>  ofproto/ofproto-dpif-lsample.h |   6 ++
>>  ofproto/ofproto-dpif-xlate.c   | 163 -
>>  ofproto/ofproto-dpif-xlate.h   |   3 +-
>>  ofproto/ofproto-dpif.c |   2 +-
>>  5 files changed, 144 insertions(+), 47 deletions(-)
>
> Not a full review, just a note that this patch needs tests in 
> ofproto-dpif.at
> that would check that we generate correct datapath flows.
>

 I thought about that, but ofproto-dpif.at is based on dummy datapat
 (userspace) which does not support this action. The configuration will
 be ignored and the datapath actions will not be generated. That's why I
 relied on system-traffic.at tests. Do you see any way around this?
>>>
>>> We don't need actual datapath support if we're not sending any actual
>>> trafic.  You should be able to just turn on the capability and check
>>> that ofproto/trace generates correct actions.
>>>
>>> We test kernel tunnels this way, for example.  See the macro
>>> OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP.  And some other similar checks.
>>>
>>
> 
> Tried it but it doesn't seem to work. I now remember I hit this issue
> originally :-).
> We cannot enable a capability beyond the datapath's "boot time"
> capabilities.
> 
> If you try to run
> "ovs-appctl dpif/set-dp-features br0 emit_sample true", the command
> fails with:
> "Can not enable features not supported by the datapth"
> 
> The safeguard does make sense in general (prevents users from
> knee-capping themselves) but for testing maybe we want to force it?
> 
> OTOH, marking this feature as supported in the userspace datapath would
> not be catastrophic, we could just warn the action is not supported
> in odp_execute(), or consider VLOG_DBG_RL as current implementation
> until we come up with a good one. However, I cannot of a good way of
> of properly reporting this to the user beforehand so I think it could
> still cause a lot of confusion.
> 
> Thoughts?

Hmm.  This command is generally for testing only or experiments.
It's not documented and hence we do not support changing datapath
features in runtime.

It should be fine to add another command like force-dp-features to
overcome the restriction.  Just make sure that it is not documented
and doesn't appear in the list-commands output.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 09/10] selftests: openvswitch: parse trunc action

2024-06-25 Thread Adrian Moreno
The trunc action was supported decode-able but not parse-able. Add
support for parsing the action string.

Signed-off-by: Adrian Moreno 
---
 .../testing/selftests/net/openvswitch/ovs-dpctl.py  | 13 +
 1 file changed, 13 insertions(+)

diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py 
b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 071309289cff..a3c26ddac42f 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -817,6 +817,19 @@ class ovsactions(nla):
 self["attrs"].append(["OVS_ACTION_ATTR_USERSPACE", uact])
 parsed = True
 
+elif parse_starts_block(actstr, "trunc(", False):
+parencount += 1
+actstr, val = parse_extract_field(
+actstr,
+"trunc(",
+r"([0-9]+)",
+int,
+False,
+None,
+)
+self["attrs"].append(["OVS_ACTION_ATTR_TRUNC", val])
+parsed = True
+
 actstr = actstr[strspn(actstr, ", ") :]
 while parencount > 0:
 parencount -= 1
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 10/10] selftests: openvswitch: add emit_sample test

2024-06-25 Thread Adrian Moreno
Add a test to verify sampling packets via psample works.

In order to do that, create a subcommand in ovs-dpctl.py to listen to
on the psample multicast group and print samples.

Signed-off-by: Adrian Moreno 
---
 .../selftests/net/openvswitch/openvswitch.sh  | 114 +-
 .../selftests/net/openvswitch/ovs-dpctl.py|  73 ++-
 2 files changed, 181 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh 
b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 15bca0708717..aeb9bee772be 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -20,7 +20,8 @@ tests="
nat_related_v4  ip4-nat-related: ICMP related 
matches work with SNAT
netlink_checks  ovsnl: validate netlink attrs 
and settings
upcall_interfaces   ovs: test the upcall interfaces
-   drop_reason drop: test drop reasons are 
emitted"
+   drop_reason drop: test drop reasons are 
emitted
+   emit_sample emit_sample: Sampling packets 
with psample"
 
 info() {
 [ $VERBOSE = 0 ] || echo $*
@@ -102,12 +103,21 @@ ovs_netns_spawn_daemon() {
shift
netns=$1
shift
-   info "spawning cmd: $*"
-   ip netns exec $netns $*  >> $ovs_dir/stdout  2>> $ovs_dir/stderr &
+   if [ "$netns" == "_default" ]; then
+   $*  >> $ovs_dir/stdout  2>> $ovs_dir/stderr &
+   else
+   ip netns exec $netns $*  >> $ovs_dir/stdout  2>> 
$ovs_dir/stderr &
+   fi
pid=$!
ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null"
 }
 
+ovs_spawn_daemon() {
+   sbx=$1
+   shift
+   ovs_netns_spawn_daemon $sbx "_default" $*
+}
+
 ovs_add_netns_and_veths () {
info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}"
ovs_sbx "$1" ip netns add "$3" || return 1
@@ -170,6 +180,19 @@ ovs_drop_reason_count()
return `echo "$perf_output" | grep "$pattern" | wc -l`
 }
 
+ovs_test_flow_fails () {
+   ERR_MSG="Flow actions may not be safe on all matching packets"
+
+   PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
+   ovs_add_flow $@ &> /dev/null $@ && return 1
+   POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
+
+   if [ "$PRE_TEST" == "$POST_TEST" ]; then
+   return 1
+   fi
+   return 0
+}
+
 usage() {
echo
echo "$0 [OPTIONS] [TEST]..."
@@ -184,6 +207,91 @@ usage() {
exit 1
 }
 
+
+# emit_sample test
+# - use emit_sample to observe packets
+test_emit_sample() {
+   sbx_add "test_emit_sample" || return $?
+
+   # Add a datapath with per-vport dispatching.
+   ovs_add_dp "test_emit_sample" emit_sample -V 2:1 || return 1
+
+   info "create namespaces"
+   ovs_add_netns_and_veths "test_emit_sample" "emit_sample" \
+   client c0 c1 172.31.110.10/24 -u || return 1
+   ovs_add_netns_and_veths "test_emit_sample" "emit_sample" \
+   server s0 s1 172.31.110.20/24 -u || return 1
+
+   # Check if emit_sample actions can be configured.
+   ovs_add_flow "test_emit_sample" emit_sample \
+   'in_port(1),eth(),eth_type(0x0806),arp()' 'emit_sample(group=1)'
+   if [ $? == 1 ]; then
+   info "no support for emit_sample - skipping"
+   ovs_exit_sig
+   return $ksft_skip
+   fi
+
+   ovs_del_flows "test_emit_sample" emit_sample
+
+   # Allow ARP
+   ovs_add_flow "test_emit_sample" emit_sample \
+   'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
+   ovs_add_flow "test_emit_sample" emit_sample \
+   'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
+
+   # Test action verification.
+   OLDIFS=$IFS
+   IFS='*'
+   min_key='in_port(1),eth(),eth_type(0x0800),ipv4()'
+   for testcase in \
+   "cookie to 
large"*"emit_sample(group=1,cookie=1615141312111009080706050403020100)" \
+   "no group with cookie"*"emit_sample(cookie=abcd)" \
+   "no group"*"sample()";
+   do
+   set -- $testcase;
+   ovs_test_flow_fails "test_emit_sample" emit_sample $min_key $2
+   if [ $? == 1 ]; then
+   info "failed - $1"
+   return 1
+   fi
+   done
+   IFS=$OLDIFS
+
+   # Sample first 14 bytes of all traffic.
+   ovs_add_flow "test_emit_sample" emit_sample \
+   
"in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=1),icmp()" \
+"trunc(14),emit_sample(group=1,cookie=c0ffee),2"
+
+   # Sample all traffic. In this case, use a sample() action with both
+   # emit_sample and an upcall emulating simultaneous local sampling and
+   # sFlow / IPFIX.
+   nlpid=$(grep -E "listening on upca

Re: [ovs-dev] [RFC v2 5/9] ofproto-dpif-xlate: Use emit_sample for local sample.

2024-06-25 Thread Adrián Moreno
On Mon, Jun 24, 2024 at 07:43:32PM GMT, Adrián Moreno wrote:
> On Mon, Jun 24, 2024 at 04:05:36PM GMT, Ilya Maximets wrote:
> > On 6/24/24 15:19, Adrián Moreno wrote:
> > > On Mon, Jun 24, 2024 at 01:37:44PM GMT, Ilya Maximets wrote:
> > >> On 6/5/24 22:23, Adrian Moreno wrote:
> > >>> Use the newly added emit_sample to implement OpenFlow sample() actions
> > >>> with local sampling configuration.
> > >>>
> > >>> Signed-off-by: Adrian Moreno 
> > >>> ---
> > >>>  ofproto/ofproto-dpif-lsample.c |  17 
> > >>>  ofproto/ofproto-dpif-lsample.h |   6 ++
> > >>>  ofproto/ofproto-dpif-xlate.c   | 163 -
> > >>>  ofproto/ofproto-dpif-xlate.h   |   3 +-
> > >>>  ofproto/ofproto-dpif.c |   2 +-
> > >>>  5 files changed, 144 insertions(+), 47 deletions(-)
> > >>
> > >> Not a full review, just a note that this patch needs tests in 
> > >> ofproto-dpif.at
> > >> that would check that we generate correct datapath flows.
> > >>
> > >
> > > I thought about that, but ofproto-dpif.at is based on dummy datapat
> > > (userspace) which does not support this action. The configuration will
> > > be ignored and the datapath actions will not be generated. That's why I
> > > relied on system-traffic.at tests. Do you see any way around this?
> >
> > We don't need actual datapath support if we're not sending any actual
> > trafic.  You should be able to just turn on the capability and check
> > that ofproto/trace generates correct actions.
> >
> > We test kernel tunnels this way, for example.  See the macro
> > OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP.  And some other similar checks.
> >
>

Tried it but it doesn't seem to work. I now remember I hit this issue
originally :-).
We cannot enable a capability beyond the datapath's "boot time"
capabilities.

If you try to run
"ovs-appctl dpif/set-dp-features br0 emit_sample true", the command
fails with:
"Can not enable features not supported by the datapth"

The safeguard does make sense in general (prevents users from
knee-capping themselves) but for testing maybe we want to force it?

OTOH, marking this feature as supported in the userspace datapath would
not be catastrophic, we could just warn the action is not supported
in odp_execute(), or consider VLOG_DBG_RL as current implementation
until we come up with a good one. However, I cannot of a good way of
of properly reporting this to the user beforehand so I think it could
still cause a lot of confusion.

Thoughts?

Thanks.
Adrián

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 07/10] selftests: openvswitch: add emit_sample action

2024-06-25 Thread Adrian Moreno
Add sample and emit_sample action support to ovs-dpctl.py.

Refactor common attribute parsing logic into an external function.

Signed-off-by: Adrian Moreno 
---
 .../selftests/net/openvswitch/ovs-dpctl.py| 162 +-
 1 file changed, 161 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py 
b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 9f8dec2f6539..ddf4d999317c 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -8,6 +8,7 @@ import argparse
 import errno
 import ipaddress
 import logging
+import math
 import multiprocessing
 import re
 import struct
@@ -58,6 +59,7 @@ OVS_FLOW_CMD_DEL = 2
 OVS_FLOW_CMD_GET = 3
 OVS_FLOW_CMD_SET = 4
 
+UINT32_MAX = 0x
 
 def macstr(mac):
 outstr = ":".join(["%02X" % i for i in mac])
@@ -267,6 +269,75 @@ def parse_extract_field(
 return str_skipped, data
 
 
+def parse_attrs(actstr, attr_desc):
+"""Parses the given action string and returns a list of netlink
+attributes based on a list of attribute descriptions.
+
+Each element in the attribute description list is a tuple such as:
+(name, attr_name, parse_func)
+where:
+name: is the string representing the attribute
+attr_name: is the name of the attribute as defined in the uAPI.
+parse_func: is a callable accepting a string and returning either
+a single object (the parsed attribute value) or a tuple of
+two values (the parsed attribute value and the remaining string)
+
+Returns a list of attributes and the remaining string.
+"""
+def parse_attr(actstr, key, func):
+actstr = actstr[len(key) :]
+
+if not func:
+return None, actstr
+
+delim = actstr[0]
+actstr = actstr[1:]
+
+if delim == "=":
+pos = strcspn(actstr, ",)")
+ret = func(actstr[:pos])
+else:
+ret = func(actstr)
+
+if isinstance(ret, tuple):
+(datum, actstr) = ret
+else:
+datum = ret
+actstr = actstr[strcspn(actstr, ",)"):]
+
+if delim == "(":
+if not actstr or actstr[0] != ")":
+raise ValueError("Action contains unbalanced parentheses")
+
+actstr = actstr[1:]
+
+actstr = actstr[strspn(actstr, ", ") :]
+
+return datum, actstr
+
+attrs = []
+attr_desc = list(attr_desc)
+while actstr and actstr[0] != ")" and attr_desc:
+found = False
+for i, (key, attr, func) in enumerate(attr_desc):
+if actstr.startswith(key):
+datum, actstr = parse_attr(actstr, key, func)
+attrs.append([attr, datum])
+found = True
+del attr_desc[i]
+
+if not found:
+raise ValueError("Unknown attribute: '%s'" % actstr)
+
+actstr = actstr[strspn(actstr, ", ") :]
+
+if actstr[0] != ")":
+raise ValueError("Action string contains extra garbage or has "
+ "unbalanced parenthesis: '%s'" % actstr)
+
+return attrs, actstr[1:]
+
+
 class ovs_dp_msg(genlmsg):
 # include the OVS version
 # We need a custom header rather than just being able to rely on
@@ -285,7 +356,7 @@ class ovsactions(nla):
 ("OVS_ACTION_ATTR_SET", "none"),
 ("OVS_ACTION_ATTR_PUSH_VLAN", "none"),
 ("OVS_ACTION_ATTR_POP_VLAN", "flag"),
-("OVS_ACTION_ATTR_SAMPLE", "none"),
+("OVS_ACTION_ATTR_SAMPLE", "sample"),
 ("OVS_ACTION_ATTR_RECIRC", "uint32"),
 ("OVS_ACTION_ATTR_HASH", "none"),
 ("OVS_ACTION_ATTR_PUSH_MPLS", "none"),
@@ -304,8 +375,85 @@ class ovsactions(nla):
 ("OVS_ACTION_ATTR_ADD_MPLS", "none"),
 ("OVS_ACTION_ATTR_DEC_TTL", "none"),
 ("OVS_ACTION_ATTR_DROP", "uint32"),
+("OVS_ACTION_ATTR_EMIT_SAMPLE", "emit_sample"),
 )
 
+class emit_sample(nla):
+nla_flags = NLA_F_NESTED
+
+nla_map = (
+("OVS_EMIT_SAMPLE_ATTR_UNSPEC", "none"),
+("OVS_EMIT_SAMPLE_ATTR_GROUP", "uint32"),
+("OVS_EMIT_SAMPLE_ATTR_COOKIE", "array(uint8)"),
+)
+
+def dpstr(self, more=False):
+args = "group=%d" % self.get_attr("OVS_EMIT_SAMPLE_ATTR_GROUP")
+
+cookie = self.get_attr("OVS_EMIT_SAMPLE_ATTR_COOKIE")
+if cookie:
+args += ",cookie(%s)" % \
+"".join(format(x, "02x") for x in cookie)
+
+return "emit_sample(%s)" % args
+
+def parse(self, actstr):
+desc = (
+("group", "OVS_EMIT_SAMPLE_ATTR_GROUP", int),
+("cookie", "OVS_EMIT_SAMPLE_ATTR_COOKIE",
+lambda x: list(bytearray.fromhex(x)))
+)
+
+attrs, actstr = parse_attrs(actstr, desc)
+
+for attr in attrs:
+self[

[ovs-dev] [PATCH net-next v5 08/10] selftests: openvswitch: add userspace parsing

2024-06-25 Thread Adrian Moreno
The userspace action lacks parsing support plus it contains a bug in the
name of one of its attributes.

This patch makes userspace action work.

Signed-off-by: Adrian Moreno 
---
 .../selftests/net/openvswitch/ovs-dpctl.py| 24 +--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py 
b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index ddf4d999317c..071309289cff 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -575,13 +575,27 @@ class ovsactions(nla):
 print_str += "userdata="
 for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"):
 print_str += "%x." % f
-if self.get_attr("OVS_USERSPACE_ATTR_TUN_PORT") is not None:
+if self.get_attr("OVS_USERSPACE_ATTR_EGRESS_TUN_PORT") is not None:
 print_str += "egress_tun_port=%d" % self.get_attr(
-"OVS_USERSPACE_ATTR_TUN_PORT"
+"OVS_USERSPACE_ATTR_EGRESS_TUN_PORT"
 )
 print_str += ")"
 return print_str
 
+def parse(self, actstr):
+attrs_desc = (
+("pid", "OVS_USERSPACE_ATTR_PID", int),
+("userdata", "OVS_USERSPACE_ATTR_USERDATA",
+lambda x: list(bytearray.fromhex(x))),
+("egress_tun_port", "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT", int)
+)
+
+attrs, actstr = parse_attrs(actstr, attrs_desc)
+for attr in attrs:
+self["attrs"].append(attr)
+
+return actstr
+
 def dpstr(self, more=False):
 print_str = ""
 
@@ -797,6 +811,12 @@ class ovsactions(nla):
 self["attrs"].append(["OVS_ACTION_ATTR_EMIT_SAMPLE", emitact])
 parsed = True
 
+elif parse_starts_block(actstr, "userspace(", False):
+uact = self.userspace()
+actstr = uact.parse(actstr[len("userspace(") : ])
+self["attrs"].append(["OVS_ACTION_ATTR_USERSPACE", uact])
+parsed = True
+
 actstr = actstr[strspn(actstr, ", ") :]
 while parencount > 0:
 parencount -= 1
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 06/10] net: openvswitch: store sampling probability in cb.

2024-06-25 Thread Adrian Moreno
When a packet sample is observed, the sampling rate that was used is
important to estimate the real frequency of such event.

Store the probability of the parent sample action in the skb's cb area
and use it in emit_sample to pass it down to psample.

Signed-off-by: Adrian Moreno 
---
 include/uapi/linux/openvswitch.h |  3 ++-
 net/openvswitch/actions.c| 20 +---
 net/openvswitch/datapath.h   |  3 +++
 net/openvswitch/vport.c  |  1 +
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 8cfa1b3f6b06..823f22be1fcc 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -649,7 +649,8 @@ enum ovs_flow_attr {
  * Actions are passed as nested attributes.
  *
  * Executes the specified actions with the given probability on a per-packet
- * basis.
+ * basis. Nested actions will be able to access the probability value of the
+ * parent @OVS_ACTION_ATTR_SAMPLE.
  */
 enum ovs_sample_attr {
OVS_SAMPLE_ATTR_UNSPEC,
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 1f555cbba312..85a2ff91379b 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1048,12 +1048,15 @@ static int sample(struct datapath *dp, struct sk_buff 
*skb,
struct nlattr *sample_arg;
int rem = nla_len(attr);
const struct sample_arg *arg;
+   u32 init_probability;
bool clone_flow_key;
+   int err;
 
/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
sample_arg = nla_data(attr);
arg = nla_data(sample_arg);
actions = nla_next(sample_arg, &rem);
+   init_probability = OVS_CB(skb)->probability;
 
if ((arg->probability != U32_MAX) &&
(!arg->probability || get_random_u32() > arg->probability)) {
@@ -1062,9 +1065,16 @@ static int sample(struct datapath *dp, struct sk_buff 
*skb,
return 0;
}
 
+   OVS_CB(skb)->probability = arg->probability;
+
clone_flow_key = !arg->exec;
-   return clone_execute(dp, skb, key, 0, actions, rem, last,
-clone_flow_key);
+   err = clone_execute(dp, skb, key, 0, actions, rem, last,
+   clone_flow_key);
+
+   if (!last)
+   OVS_CB(skb)->probability = init_probability;
+
+   return err;
 }
 
 /* When 'last' is true, clone() should always consume the 'skb'.
@@ -1312,6 +1322,7 @@ static void execute_emit_sample(struct datapath *dp, 
struct sk_buff *skb,
struct psample_group psample_group = {};
struct psample_metadata md = {};
const struct nlattr *a;
+   u32 rate;
int rem;
 
nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
@@ -1330,8 +1341,11 @@ static void execute_emit_sample(struct datapath *dp, 
struct sk_buff *skb,
psample_group.net = ovs_dp_get_net(dp);
md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex;
md.trunc_size = skb->len - OVS_CB(skb)->cutlen;
+   md.rate_as_probability = 1;
+
+   rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX;
 
-   psample_sample_packet(&psample_group, skb, 0, &md);
+   psample_sample_packet(&psample_group, skb, rate, &md);
 #endif
 }
 
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 0cd29971a907..9ca6231ea647 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -115,12 +115,15 @@ struct datapath {
  * fragmented.
  * @acts_origlen: The netlink size of the flow actions applied to this skb.
  * @cutlen: The number of bytes from the packet end to be removed.
+ * @probability: The sampling probability that was applied to this skb; 0 means
+ * no sampling has occurred; U32_MAX means 100% probability.
  */
 struct ovs_skb_cb {
struct vport*input_vport;
u16 mru;
u16 acts_origlen;
u32 cutlen;
+   u32 probability;
 };
 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
 
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 972ae01a70f7..8732f6e51ae5 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -500,6 +500,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff 
*skb,
OVS_CB(skb)->input_vport = vport;
OVS_CB(skb)->mru = 0;
OVS_CB(skb)->cutlen = 0;
+   OVS_CB(skb)->probability = 0;
if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
u32 mark;
 
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 05/10] net: openvswitch: add emit_sample action

2024-06-25 Thread Adrian Moreno
Add support for a new action: emit_sample.

This action accepts a u32 group id and a variable-length cookie and uses
the psample multicast group to make the packet available for
observability.

The maximum length of the user-defined cookie is set to 16, same as
tc_cookie, to discourage using cookies that will not be offloadable.

Signed-off-by: Adrian Moreno 
---
 Documentation/netlink/specs/ovs_flow.yaml | 17 +
 include/uapi/linux/openvswitch.h  | 28 ++
 net/openvswitch/Kconfig   |  1 +
 net/openvswitch/actions.c | 45 +++
 net/openvswitch/flow_netlink.c| 33 -
 5 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/Documentation/netlink/specs/ovs_flow.yaml 
b/Documentation/netlink/specs/ovs_flow.yaml
index 4fdfc6b5cae9..a7ab5593a24f 100644
--- a/Documentation/netlink/specs/ovs_flow.yaml
+++ b/Documentation/netlink/specs/ovs_flow.yaml
@@ -727,6 +727,12 @@ attribute-sets:
 name: dec-ttl
 type: nest
 nested-attributes: dec-ttl-attrs
+  -
+name: emit-sample
+type: nest
+nested-attributes: emit-sample-attrs
+doc: |
+  Sends a packet sample to psample for external observation.
   -
 name: tunnel-key-attrs
 enum-name: ovs-tunnel-key-attr
@@ -938,6 +944,17 @@ attribute-sets:
   -
 name: gbp
 type: u32
+  -
+name: emit-sample-attrs
+enum-name: ovs-emit-sample-attr
+name-prefix: ovs-emit-sample-attr-
+attributes:
+  -
+name: group
+type: u32
+  -
+name: cookie
+type: binary
 
 operations:
   name-prefix: ovs-flow-cmd-
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index efc82c318fa2..8cfa1b3f6b06 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -914,6 +914,31 @@ struct check_pkt_len_arg {
 };
 #endif
 
+#define OVS_EMIT_SAMPLE_COOKIE_MAX_SIZE 16
+/**
+ * enum ovs_emit_sample_attr - Attributes for %OVS_ACTION_ATTR_EMIT_SAMPLE
+ * action.
+ *
+ * @OVS_EMIT_SAMPLE_ATTR_GROUP: 32-bit number to identify the source of the
+ * sample.
+ * @OVS_EMIT_SAMPLE_ATTR_COOKIE: A variable-length binary cookie that contains
+ * user-defined metadata. The maximum length is OVS_EMIT_SAMPLE_COOKIE_MAX_SIZE
+ * bytes.
+ *
+ * Sends the packet to the psample multicast group with the specified group and
+ * cookie. It is possible to combine this action with the
+ * %OVS_ACTION_ATTR_TRUNC action to limit the size of the packet being emitted.
+ */
+enum ovs_emit_sample_attr {
+   OVS_EMIT_SAMPLE_ATTR_GROUP = 1, /* u32 number. */
+   OVS_EMIT_SAMPLE_ATTR_COOKIE,/* Optional, user specified cookie. */
+
+   /* private: */
+   __OVS_EMIT_SAMPLE_ATTR_MAX
+};
+
+#define OVS_EMIT_SAMPLE_ATTR_MAX (__OVS_EMIT_SAMPLE_ATTR_MAX - 1)
+
 /**
  * enum ovs_action_attr - Action types.
  *
@@ -966,6 +991,8 @@ struct check_pkt_len_arg {
  * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
  * argument.
  * @OVS_ACTION_ATTR_DROP: Explicit drop action.
+ * @OVS_ACTION_ATTR_EMIT_SAMPLE: Send a sample of the packet to external
+ * observers via psample.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -1004,6 +1031,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */
OVS_ACTION_ATTR_DEC_TTL,  /* Nested OVS_DEC_TTL_ATTR_*. */
OVS_ACTION_ATTR_DROP, /* u32 error code. */
+   OVS_ACTION_ATTR_EMIT_SAMPLE,  /* Nested OVS_EMIT_SAMPLE_ATTR_*. */
 
__OVS_ACTION_ATTR_MAX,/* Nothing past this will be accepted
   * from userspace. */
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 29a7081858cd..2535f3f9f462 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -10,6 +10,7 @@ config OPENVSWITCH
   (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
 (!NF_NAT || NF_NAT) && \
 (!NETFILTER_CONNCOUNT || 
NETFILTER_CONNCOUNT)))
+   depends on PSAMPLE || !PSAMPLE
select LIBCRC32C
select MPLS
select NET_MPLS_GSO
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 964225580824..1f555cbba312 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -24,6 +24,11 @@
 #include 
 #include 
 #include 
+
+#if IS_ENABLED(CONFIG_PSAMPLE)
+#include 
+#endif
+
 #include 
 
 #include "datapath.h"
@@ -1299,6 +1304,37 @@ static int execute_dec_ttl(struct sk_buff *skb, struct 
sw_flow_key *key)
return 0;
 }
 
+static void execute_emit_sample(struct datapath *dp, struct sk_buff *skb,
+   const struct sw_flow_key *key,
+   con

[ovs-dev] [PATCH net-next v5 04/10] net: psample: allow using rate as probability

2024-06-25 Thread Adrian Moreno
Although not explicitly documented in the psample module itself, the
definition of PSAMPLE_ATTR_SAMPLE_RATE seems inherited from act_sample.

Quoting tc-sample(8):
"RATE of 100 will lead to an average of one sampled packet out of every
100 observed."

With this semantics, the rates that we can express with an unsigned
32-bits number are very unevenly distributed and concentrated towards
"sampling few packets".
For example, we can express a probability of 2.32E-8% but we
cannot express anything between 100% and 50%.

For sampling applications that are capable of sampling a decent
amount of packets, this sampling rate semantics is not very useful.

Add a new flag to the uAPI that indicates that the sampling rate is
expressed in scaled probability, this is:
- 0 is 0% probability, no packets get sampled.
- U32_MAX is 100% probability, all packets get sampled.

Signed-off-by: Adrian Moreno 
---
 include/net/psample.h|  3 ++-
 include/uapi/linux/psample.h | 10 +-
 net/psample/psample.c|  3 +++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/include/net/psample.h b/include/net/psample.h
index 2ac71260a546..c52e9ebd88dd 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -24,7 +24,8 @@ struct psample_metadata {
u8 out_tc_valid:1,
   out_tc_occ_valid:1,
   latency_valid:1,
-  unused:5;
+  rate_as_probability:1,
+  unused:4;
const u8 *user_cookie;
u32 user_cookie_len;
 };
diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
index e80637e1d97b..b765f0e81f20 100644
--- a/include/uapi/linux/psample.h
+++ b/include/uapi/linux/psample.h
@@ -8,7 +8,11 @@ enum {
PSAMPLE_ATTR_ORIGSIZE,
PSAMPLE_ATTR_SAMPLE_GROUP,
PSAMPLE_ATTR_GROUP_SEQ,
-   PSAMPLE_ATTR_SAMPLE_RATE,
+   PSAMPLE_ATTR_SAMPLE_RATE,   /* u32, ratio between observed and
+* sampled packets or scaled probability
+* if PSAMPLE_ATTR_SAMPLE_PROBABILITY
+* is set.
+*/
PSAMPLE_ATTR_DATA,
PSAMPLE_ATTR_GROUP_REFCOUNT,
PSAMPLE_ATTR_TUNNEL,
@@ -20,6 +24,10 @@ enum {
PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */
PSAMPLE_ATTR_PROTO, /* u16 */
PSAMPLE_ATTR_USER_COOKIE,   /* binary, user provided data */
+   PSAMPLE_ATTR_SAMPLE_PROBABILITY,/* no argument, interpret rate in
+* PSAMPLE_ATTR_SAMPLE_RATE as a
+* probability scaled 0 - U32_MAX.
+*/
 
__PSAMPLE_ATTR_MAX
 };
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 1c76f3e48dcd..f48b5b9cd409 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -497,6 +497,9 @@ void psample_sample_packet(struct psample_group *group, 
struct sk_buff *skb,
md->user_cookie))
goto error;
 
+   if (md->rate_as_probability)
+   nla_put_flag(skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY);
+
genlmsg_end(nl_skb, data);
genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 03/10] net: psample: skip packet copy if no listeners

2024-06-25 Thread Adrian Moreno
If nobody is listening on the multicast group, generating the sample,
which involves copying packet data, seems completely unnecessary.

Return fast in this case.

Reviewed-by: Simon Horman 
Signed-off-by: Adrian Moreno 
---
 net/psample/psample.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/net/psample/psample.c b/net/psample/psample.c
index b37488f426bc..1c76f3e48dcd 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -376,6 +376,10 @@ void psample_sample_packet(struct psample_group *group, 
struct sk_buff *skb,
void *data;
int ret;
 
+   if (!genl_has_listeners(&psample_nl_family, group->net,
+   PSAMPLE_NL_MCGRP_SAMPLE))
+   return;
+
meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) +
   (out_ifindex ? nla_total_size(sizeof(u16)) : 0) +
   (md->out_tc_valid ? nla_total_size(sizeof(u16)) : 0) +
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 01/10] net: psample: add user cookie

2024-06-25 Thread Adrian Moreno
Add a user cookie to the sample metadata so that sample emitters can
provide more contextual information to samples.

If present, send the user cookie in a new attribute:
PSAMPLE_ATTR_USER_COOKIE.

Reviewed-by: Simon Horman 
Signed-off-by: Adrian Moreno 
---
 include/net/psample.h| 2 ++
 include/uapi/linux/psample.h | 1 +
 net/psample/psample.c| 9 -
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/net/psample.h b/include/net/psample.h
index 0509d2d6be67..2ac71260a546 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -25,6 +25,8 @@ struct psample_metadata {
   out_tc_occ_valid:1,
   latency_valid:1,
   unused:5;
+   const u8 *user_cookie;
+   u32 user_cookie_len;
 };
 
 struct psample_group *psample_group_get(struct net *net, u32 group_num);
diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
index e585db5bf2d2..e80637e1d97b 100644
--- a/include/uapi/linux/psample.h
+++ b/include/uapi/linux/psample.h
@@ -19,6 +19,7 @@ enum {
PSAMPLE_ATTR_LATENCY,   /* u64, nanoseconds */
PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */
PSAMPLE_ATTR_PROTO, /* u16 */
+   PSAMPLE_ATTR_USER_COOKIE,   /* binary, user provided data */
 
__PSAMPLE_ATTR_MAX
 };
diff --git a/net/psample/psample.c b/net/psample/psample.c
index a5d9b8446f77..b37488f426bc 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -386,7 +386,9 @@ void psample_sample_packet(struct psample_group *group, 
struct sk_buff *skb,
   nla_total_size(sizeof(u32)) +/* group_num */
   nla_total_size(sizeof(u32)) +/* seq */
   nla_total_size_64bit(sizeof(u64)) +  /* timestamp */
-  nla_total_size(sizeof(u16)); /* protocol */
+  nla_total_size(sizeof(u16)) +/* protocol */
+  (md->user_cookie_len ?
+   nla_total_size(md->user_cookie_len) : 0); /* user cookie */
 
 #ifdef CONFIG_INET
tun_info = skb_tunnel_info(skb);
@@ -486,6 +488,11 @@ void psample_sample_packet(struct psample_group *group, 
struct sk_buff *skb,
}
 #endif
 
+   if (md->user_cookie && md->user_cookie_len &&
+   nla_put(nl_skb, PSAMPLE_ATTR_USER_COOKIE, md->user_cookie_len,
+   md->user_cookie))
+   goto error;
+
genlmsg_end(nl_skb, data);
genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 02/10] net: sched: act_sample: add action cookie to sample

2024-06-25 Thread Adrian Moreno
If the action has a user_cookie, pass it along to the sample so it can
be easily identified.

Signed-off-by: Adrian Moreno 
---
 net/sched/act_sample.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index a69b53d54039..2ceb4d141b71 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -167,7 +167,9 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb,
 {
struct tcf_sample *s = to_sample(a);
struct psample_group *psample_group;
+   u8 cookie_data[TC_COOKIE_MAX_SIZE];
struct psample_metadata md = {};
+   struct tc_cookie *user_cookie;
int retval;
 
tcf_lastuse_update(&s->tcf_tm);
@@ -189,6 +191,16 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb,
if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
skb_push(skb, skb->mac_len);
 
+   rcu_read_lock();
+   user_cookie = rcu_dereference(a->user_cookie);
+   if (user_cookie) {
+   memcpy(cookie_data, user_cookie->data,
+  user_cookie->len);
+   md.user_cookie = cookie_data;
+   md.user_cookie_len = user_cookie->len;
+   }
+   rcu_read_unlock();
+
md.trunc_size = s->truncate ? s->trunc_size : skb->len;
psample_sample_packet(psample_group, skb, s->rate, &md);
 
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 00/10] net: openvswitch: Add sample multicasting.

2024-06-25 Thread Adrian Moreno
** Background **
Currently, OVS supports several packet sampling mechanisms (sFlow,
per-bridge IPFIX, per-flow IPFIX). These end up being translated into a
userspace action that needs to be handled by ovs-vswitchd's handler
threads only to be forwarded to some third party application that
will somehow process the sample and provide observability on the
datapath.

A particularly interesting use-case is controller-driven
per-flow IPFIX sampling where the OpenFlow controller can add metadata
to samples (via two 32bit integers) and this metadata is then available
to the sample-collecting system for correlation.

** Problem **
The fact that sampled traffic share netlink sockets and handler thread
time with upcalls, apart from being a performance bottleneck in the
sample extraction itself, can severely compromise the datapath,
yielding this solution unfit for highly loaded production systems.

Users are left with little options other than guessing what sampling
rate will be OK for their traffic pattern and system load and dealing
with the lost accuracy.

Looking at available infrastructure, an obvious candidated would be
to use psample. However, it's current state does not help with the
use-case at stake because sampled packets do not contain user-defined
metadata.

** Proposal **
This series is an attempt to fix this situation by extending the
existing psample infrastructure to carry a variable length
user-defined cookie.

The main existing user of psample is tc's act_sample. It is also
extended to forward the action's cookie to psample.

Finally, a new OVS action (OVS_SAMPLE_ATTR_EMIT_SAMPLE) is created.
It accepts a group and an optional cookie and uses psample to
multicast the packet and the metadata.

--
v4 -> v5:
- Rebased.
- Removed lefover enum value and wrapped some long lines in selftests.

v3 -> v4:
- Rebased.
- Addressed Jakub's comment on private and unused nla attributes.

v2 -> v3:
- Addressed comments from Simon, Aaron and Ilya.
- Dropped probability propagation in nested sample actions.
- Dropped patch v2's 7/9 in favor of a userspace implementation and
consume skb if emit_sample is the last action, same as we do with
userspace.
- Split ovs-dpctl.py features in independent patches.

v1 -> v2:
- Create a new action ("emit_sample") rather than reuse existing
  "sample" one.
- Add probability semantics to psample's sampling rate.
- Store sampling probability in skb's cb area and use it in emit_sample.
- Test combining "emit_sample" with "trunc"
- Drop group_id filtering and tracepoint in psample.

rfc_v2 -> v1:
- Accommodate Ilya's comments.
- Split OVS's attribute in two attributes and simplify internal
handling of psample arguments.
- Extend psample and tc with a user-defined cookie.
- Add a tracepoint to psample to facilitate troubleshooting.

rfc_v1 -> rfc_v2:
- Use psample instead of a new OVS-only multicast group.
- Extend psample and tc with a user-defined cookie.

Adrian Moreno (10):
  net: psample: add user cookie
  net: sched: act_sample: add action cookie to sample
  net: psample: skip packet copy if no listeners
  net: psample: allow using rate as probability
  net: openvswitch: add emit_sample action
  net: openvswitch: store sampling probability in cb.
  selftests: openvswitch: add emit_sample action
  selftests: openvswitch: add userspace parsing
  selftests: openvswitch: parse trunc action
  selftests: openvswitch: add emit_sample test

 Documentation/netlink/specs/ovs_flow.yaml |  17 ++
 include/net/psample.h |   5 +-
 include/uapi/linux/openvswitch.h  |  31 +-
 include/uapi/linux/psample.h  |  11 +-
 net/openvswitch/Kconfig   |   1 +
 net/openvswitch/actions.c |  63 +++-
 net/openvswitch/datapath.h|   3 +
 net/openvswitch/flow_netlink.c|  33 ++-
 net/openvswitch/vport.c   |   1 +
 net/psample/psample.c |  16 +-
 net/sched/act_sample.c|  12 +
 .../selftests/net/openvswitch/openvswitch.sh  | 114 +++-
 .../selftests/net/openvswitch/ovs-dpctl.py| 272 +-
 13 files changed, 563 insertions(+), 16 deletions(-)

-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v4 04/10] net: psample: allow using rate as probability

2024-06-25 Thread Adrián Moreno
On Tue, Jun 25, 2024 at 01:17:19PM GMT, Paolo Abeni wrote:
> On Fri, 2024-06-21 at 12:10 +0200, Adrian Moreno wrote:
> > diff --git a/include/uapi/linux/tc_act/tc_sample.h 
> > b/include/uapi/linux/tc_act/tc_sample.h
> > index fee1bcc20793..7ee0735e7b38 100644
> > --- a/include/uapi/linux/tc_act/tc_sample.h
> > +++ b/include/uapi/linux/tc_act/tc_sample.h
> > @@ -18,6 +18,7 @@ enum {
> > TCA_SAMPLE_TRUNC_SIZE,
> > TCA_SAMPLE_PSAMPLE_GROUP,
> > TCA_SAMPLE_PAD,
> > +   TCA_SAMPLE_PROBABILITY,
> > __TCA_SAMPLE_MAX
> >  };
> >  #define TCA_SAMPLE_MAX (__TCA_SAMPLE_MAX - 1)
>
> I believe Ilya's comment on v3 is correct, this chunk looks unrelated
> and unneeded. I guess you can drop it? Or am I missing something?
>

Thanks both for spotting it. I'll send v5 without it.

> Thanks,
>
> Paolo
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v3 6/7] selftests: net: Use the provided dpctl rather than the vswitchd for tests.

2024-06-25 Thread Aaron Conole
The current pmtu test infrastucture requires an installed copy of the
ovs-vswitchd userspace.  This means that any automated or constrained
environments may not have the requisite tools to run the tests.  However,
the pmtu tests don't require any special classifier processing.  Indeed
they are only using the vswitchd in the most basic mode - as a NORMAL
switch.

However, the ovs-dpctl kernel utility can now program all the needed basic
flows to allow traffic to traverse the tunnels and provide support for at
least testing some basic pmtu scenarios.  More complicated flow pipelines
can be added to the internal ovs test infrastructure, but that is work for
the future.  For now, enable the most common cases - wide mega flows with
no other prerequisites.

Enhance the pmtu testing to try testing using the internal utility, first.
As a fallback, if the internal utility isn't running, then try with the
ovs-vswitchd userspace tools.

Additionally, make sure that when the pyroute2 package is not available
the ovs-dpctl utility will error out to properly signal an error has
occurred and skip using the internal utility.

Reviewed-by: Stefano Brivio 
Signed-off-by: Aaron Conole 
---
v3: Exit with an error code from ovs-dpctl when the pyroute2 module
is not available.

 .../selftests/net/openvswitch/ovs-dpctl.py|   2 +-
 tools/testing/selftests/net/pmtu.sh   | 145 +++---
 2 files changed, 124 insertions(+), 23 deletions(-)

diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py 
b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index cb3b910bbc4a..182a09975975 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -34,7 +34,7 @@ try:
 
 except ModuleNotFoundError:
 print("Need to install the python pyroute2 package >= 0.6.")
-sys.exit(0)
+sys.exit(1)
 
 
 OVS_DATAPATH_FAMILY = "ovs_datapath"
diff --git a/tools/testing/selftests/net/pmtu.sh 
b/tools/testing/selftests/net/pmtu.sh
index cfc84958025a..5175c0c83a23 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -842,25 +842,97 @@ setup_bridge() {
run_cmd ${ns_a} ip link set veth_A-C master br0
 }
 
+setup_ovs_via_internal_utility() {
+   type="${1}"
+   a_addr="${2}"
+   b_addr="${3}"
+   dport="${4}"
+
+   run_cmd python3 ./openvswitch/ovs-dpctl.py add-if ovs_br0 ${type}_a -t 
${type} || return 1
+
+   ports=$(python3 ./openvswitch/ovs-dpctl.py show)
+   br0_port=$(echo "$ports" | grep -E "\sovs_br0" | sed -e 's@port @@' | 
cut -d: -f1 | xargs)
+   type_a_port=$(echo "$ports" | grep ${type}_a | sed -e 's@port @@' | cut 
-d: -f1 | xargs)
+   veth_a_port=$(echo "$ports" | grep veth_A | sed -e 's@port @@' | cut 
-d: -f1 | xargs)
+
+   v4_a_tun="${prefix4}.${a_r1}.1"
+   v4_b_tun="${prefix4}.${b_r1}.1"
+
+   v6_a_tun="${prefix6}:${a_r1}::1"
+   v6_b_tun="${prefix6}:${b_r1}::1"
+
+   if [ "${v4_a_tun}" = "${a_addr}" ]; then
+   run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+   
"recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \
+   
"set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
+   run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+   
"recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x86dd),ipv6()" \
+   
"set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
+   run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+   
"recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0800),ipv4()"
 \
+   "${veth_a_port}"
+   run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+   
"recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x86dd),ipv6()"
 \
+   "${veth_a_port}"
+   run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+   
"recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0806),arp()"
 \
+   "${veth_a_port}"
+   run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+   
"recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0806),arp(sip=${veth4_c_addr},tip=${tunnel4_b_addr})"
 \
+   
"set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
+   else
+   run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+   
"recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \
+   
"set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_

[ovs-dev] [PATCH net-next v3 7/7] selftests: net: add config for openvswitch

2024-06-25 Thread Aaron Conole
The pmtu testing will require that the OVS module is installed,
so do that.

Reviewed-by: Simon Horman 
Tested-by: Simon Horman 
Signed-off-by: Aaron Conole 
---
 tools/testing/selftests/net/config | 5 +
 1 file changed, 5 insertions(+)

diff --git a/tools/testing/selftests/net/config 
b/tools/testing/selftests/net/config
index d4891f7a2bfa..cf43a1d07046 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -103,3 +103,8 @@ CONFIG_XFRM_INTERFACE=m
 CONFIG_XFRM_USER=m
 CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_OPENVSWITCH=m
+CONFIG_OPENVSWITCH_GRE=m
+CONFIG_OPENVSWITCH_VXLAN=m
+CONFIG_OPENVSWITCH_GENEVE=m
+CONFIG_NF_CONNTRACK_OVS=y
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v3 5/7] selftests: openvswitch: Support implicit ipv6 arguments.

2024-06-25 Thread Aaron Conole
The current iteration of IPv6 support requires explicit fields to be set
in addition to not properly support the actual IPv6 addresses properly.
With this change, make it so that the ipv6() bare option is usable to
create wildcarded flows to match broad swaths of ipv6 traffic.

Reviewed-by: Simon Horman 
Tested-by: Simon Horman 
Signed-off-by: Aaron Conole 
---
v2: Drop the change in the v6 key class that removed the
str() conversion

 .../selftests/net/openvswitch/ovs-dpctl.py| 36 ---
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py 
b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index db8163103406..cb3b910bbc4a 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -200,6 +200,18 @@ def convert_ipv4(data):
 
 return int(ipaddress.IPv4Address(ip)), int(ipaddress.IPv4Address(mask))
 
+def convert_ipv6(data):
+ip, _, mask = data.partition('/')
+
+if not ip:
+ip = mask = 0
+elif not mask:
+mask = ':::::::'
+elif mask.isdigit():
+mask = ipaddress.IPv6Network("::/" + mask).hostmask
+
+return ipaddress.IPv6Address(ip).packed, ipaddress.IPv6Address(mask).packed
+
 def convert_int(size):
 def convert_int_sized(data):
 value, _, mask = data.partition('/')
@@ -941,21 +953,21 @@ class ovskey(nla):
 "src",
 "src",
 lambda x: str(ipaddress.IPv6Address(x)),
-lambda x: int.from_bytes(x, "big"),
-lambda x: ipaddress.IPv6Address(x),
+lambda x: ipaddress.IPv6Address(x).packed if x else 0,
+convert_ipv6,
 ),
 (
 "dst",
 "dst",
 lambda x: str(ipaddress.IPv6Address(x)),
-lambda x: int.from_bytes(x, "big"),
-lambda x: ipaddress.IPv6Address(x),
+lambda x: ipaddress.IPv6Address(x).packed if x else 0,
+convert_ipv6,
 ),
-("label", "label", "%d", int),
-("proto", "proto", "%d", int),
-("tclass", "tclass", "%d", int),
-("hlimit", "hlimit", "%d", int),
-("frag", "frag", "%d", int),
+("label", "label", "%d", lambda x: int(x) if x else 0),
+("proto", "proto", "%d", lambda x: int(x) if x else 0),
+("tclass", "tclass", "%d", lambda x: int(x) if x else 0),
+("hlimit", "hlimit", "%d", lambda x: int(x) if x else 0),
+("frag", "frag", "%d", lambda x: int(x) if x else 0),
 )
 
 def __init__(
@@ -1153,7 +1165,7 @@ class ovskey(nla):
 "target",
 "target",
 lambda x: str(ipaddress.IPv6Address(x)),
-lambda x: int.from_bytes(x, "big"),
+convert_ipv6,
 ),
 ("sll", "sll", macstr, lambda x: int.from_bytes(x, "big")),
 ("tll", "tll", macstr, lambda x: int.from_bytes(x, "big")),
@@ -1238,13 +1250,13 @@ class ovskey(nla):
 "src",
 "src",
 lambda x: str(ipaddress.IPv6Address(x)),
-lambda x: int.from_bytes(x, "big", convertmac),
+convert_ipv6,
 ),
 (
 "dst",
 "dst",
 lambda x: str(ipaddress.IPv6Address(x)),
-lambda x: int.from_bytes(x, "big"),
+convert_ipv6,
 ),
 ("tp_src", "tp_src", "%d", int),
 ("tp_dst", "tp_dst", "%d", int),
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v3 4/7] selftests: openvswitch: Add support for tunnel() key.

2024-06-25 Thread Aaron Conole
This will be used when setting details about the tunnel to use as
transport.  There is a difference between the ODP format between tunnel():
the 'key' flag is not actually a flag field, so we don't support it in the
same way that the vswitchd userspace supports displaying it.

Signed-off-by: Aaron Conole 
---
 .../selftests/net/openvswitch/ovs-dpctl.py| 167 +-
 1 file changed, 166 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py 
b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 00a8d6c0163b..db8163103406 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -709,7 +709,7 @@ class ovskey(nla):
 ("OVS_KEY_ATTR_ARP", "ovs_key_arp"),
 ("OVS_KEY_ATTR_ND", "ovs_key_nd"),
 ("OVS_KEY_ATTR_SKB_MARK", "uint32"),
-("OVS_KEY_ATTR_TUNNEL", "none"),
+("OVS_KEY_ATTR_TUNNEL", "ovs_key_tunnel"),
 ("OVS_KEY_ATTR_SCTP", "ovs_key_sctp"),
 ("OVS_KEY_ATTR_TCP_FLAGS", "be16"),
 ("OVS_KEY_ATTR_DP_HASH", "uint32"),
@@ -1269,6 +1269,163 @@ class ovskey(nla):
 init=init,
 )
 
+class ovs_key_tunnel(nla):
+nla_flags = NLA_F_NESTED
+
+nla_map = (
+("OVS_TUNNEL_KEY_ATTR_ID", "be64"),
+("OVS_TUNNEL_KEY_ATTR_IPV4_SRC", "ipaddr"),
+("OVS_TUNNEL_KEY_ATTR_IPV4_DST", "ipaddr"),
+("OVS_TUNNEL_KEY_ATTR_TOS", "uint8"),
+("OVS_TUNNEL_KEY_ATTR_TTL", "uint8"),
+("OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT", "flag"),
+("OVS_TUNNEL_KEY_ATTR_CSUM", "flag"),
+("OVS_TUNNEL_KEY_ATTR_OAM", "flag"),
+("OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS", "array(uint32)"),
+("OVS_TUNNEL_KEY_ATTR_TP_SRC", "be16"),
+("OVS_TUNNEL_KEY_ATTR_TP_DST", "be16"),
+("OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS", "none"),
+("OVS_TUNNEL_KEY_ATTR_IPV6_SRC", "ipaddr"),
+("OVS_TUNNEL_KEY_ATTR_IPV6_DST", "ipaddr"),
+("OVS_TUNNEL_KEY_ATTR_PAD", "none"),
+("OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS", "none"),
+("OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE", "flag"),
+)
+
+def parse(self, flowstr, mask=None):
+if not flowstr.startswith("tunnel("):
+return None, None
+
+k = ovskey.ovs_key_tunnel()
+if mask is not None:
+mask = ovskey.ovs_key_tunnel()
+
+flowstr = flowstr[len("tunnel("):]
+
+v6_address = None
+
+fields = [
+("tun_id=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_ID",
+ 0x, None, None),
+
+("src=", r"([0-9a-fA-F\.]+)", str,
+ "OVS_TUNNEL_KEY_ATTR_IPV4_SRC", "255.255.255.255", "0.0.0.0",
+ False),
+("dst=", r"([0-9a-fA-F\.]+)", str,
+ "OVS_TUNNEL_KEY_ATTR_IPV4_DST", "255.255.255.255", "0.0.0.0",
+ False),
+
+("ipv6_src=", r"([0-9a-fA-F:]+)", str,
+ "OVS_TUNNEL_KEY_ATTR_IPV6_SRC",
+ ":::::::", "::", True),
+("ipv6_dst=", r"([0-9a-fA-F:]+)", str,
+ "OVS_TUNNEL_KEY_ATTR_IPV6_DST",
+ ":::::::", "::", True),
+
+("tos=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TOS", 255, 0,
+ None),
+("ttl=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TTL", 255, 0,
+ None),
+
+("tp_src=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TP_SRC",
+ 65535, 0, None),
+("tp_dst=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TP_DST",
+ 65535, 0, None),
+]
+
+forced_include = ["OVS_TUNNEL_KEY_ATTR_TTL"]
+
+for prefix, regex, typ, attr_name, mask_val, default_val, v46_flag 
in fields:
+flowstr, value = parse_extract_field(flowstr, prefix, regex, 
typ, False)
+if not attr_name:
+raise Exception("Bad list value in tunnel fields")
+
+if value is None and attr_name in forced_include:
+value = default_val
+mask_val = default_val
+
+if value is not None:
+if v46_flag is not None:
+if v6_address is None:
+v6_address = v46_flag
+if v46_flag != v6_address:
+raise ValueError("Cannot mix v6 and v4 addresses")
+k["attrs"].append([attr_name, value])
+if mask is not None:
+mask["attrs"].append([attr_name, mask_val])
+else:
+if v46_flag is not None:
+if v6_address is None or v46_flag != v6_address:
+   

[ovs-dev] [PATCH net-next v3 3/7] selftests: openvswitch: Add set() and set_masked() support.

2024-06-25 Thread Aaron Conole
These will be used in upcoming commits to set specific attributes for
interacting with tunnels.  Since set() will use the key parsing routine, we
also make sure to prepend it with an open paren, for the action parsing to
properly understand it.

Reviewed-by: Simon Horman 
Tested-by: Simon Horman 
Signed-off-by: Aaron Conole 
---
 .../selftests/net/openvswitch/ovs-dpctl.py| 37 +--
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py 
b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 6bbe15daca74..00a8d6c0163b 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -284,7 +284,7 @@ class ovsactions(nla):
 ("OVS_ACTION_ATTR_UNSPEC", "none"),
 ("OVS_ACTION_ATTR_OUTPUT", "uint32"),
 ("OVS_ACTION_ATTR_USERSPACE", "userspace"),
-("OVS_ACTION_ATTR_SET", "none"),
+("OVS_ACTION_ATTR_SET", "ovskey"),
 ("OVS_ACTION_ATTR_PUSH_VLAN", "none"),
 ("OVS_ACTION_ATTR_POP_VLAN", "flag"),
 ("OVS_ACTION_ATTR_SAMPLE", "none"),
@@ -292,7 +292,7 @@ class ovsactions(nla):
 ("OVS_ACTION_ATTR_HASH", "none"),
 ("OVS_ACTION_ATTR_PUSH_MPLS", "none"),
 ("OVS_ACTION_ATTR_POP_MPLS", "flag"),
-("OVS_ACTION_ATTR_SET_MASKED", "none"),
+("OVS_ACTION_ATTR_SET_MASKED", "ovskey"),
 ("OVS_ACTION_ATTR_CT", "ctact"),
 ("OVS_ACTION_ATTR_TRUNC", "uint32"),
 ("OVS_ACTION_ATTR_PUSH_ETH", "none"),
@@ -469,6 +469,18 @@ class ovsactions(nla):
 print_str += "clone("
 print_str += datum.dpstr(more)
 print_str += ")"
+elif field[0] == "OVS_ACTION_ATTR_SET" or \
+ field[0] == "OVS_ACTION_ATTR_SET_MASKED":
+print_str += "set"
+field = datum
+mask = None
+if field[0] == "OVS_ACTION_ATTR_SET_MASKED":
+print_str += "_masked"
+field = datum[0]
+mask = datum[1]
+print_str += "("
+print_str += field.dpstr(mask, more)
+print_str += ")"
 else:
 try:
 print_str += datum.dpstr(more)
@@ -547,6 +559,25 @@ class ovsactions(nla):
 self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts))
 actstr = actstr[parsedLen:]
 parsed = True
+elif parse_starts_block(actstr, "set(", False):
+parencount += 1
+k = ovskey()
+actstr = actstr[len("set("):]
+actstr = k.parse(actstr, None)
+self["attrs"].append(("OVS_ACTION_ATTR_SET", k))
+if not actstr.startswith(")"):
+actstr = ")" + actstr
+parsed = True
+elif parse_starts_block(actstr, "set_masked(", False):
+parencount += 1
+k = ovskey()
+m = ovskey()
+actstr = actstr[len("set_masked("):]
+actstr = k.parse(actstr, m)
+self["attrs"].append(("OVS_ACTION_ATTR_SET_MASKED", [k, m]))
+if not actstr.startswith(")"):
+actstr = ")" + actstr
+parsed = True
 elif parse_starts_block(actstr, "ct(", False):
 parencount += 1
 actstr = actstr[len("ct(") :]
@@ -1312,7 +1343,7 @@ class ovskey(nla):
 mask["attrs"].append([field[0], m])
 self["attrs"].append([field[0], k])
 
-flowstr = flowstr[strspn(flowstr, "),") :]
+flowstr = flowstr[strspn(flowstr, "), ") :]
 
 return flowstr
 
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v3 2/7] selftests: openvswitch: Refactor actions parsing.

2024-06-25 Thread Aaron Conole
Until recently, the ovs-dpctl utility was used with a limited actions set
and didn't need to have support for multiple similar actions.  However,
when adding support for tunnels, it will be important to support multiple
set() actions in a single flow.  When printing these actions, the existing
code will be unable to print all of the sets - it will only print the
first.

Refactor this code to be easier to read and support multiple actions of the
same type in an action list.

Reviewed-by: Simon Horman 
Tested-by: Simon Horman 
Signed-off-by: Aaron Conole 
---
 .../selftests/net/openvswitch/ovs-dpctl.py| 45 ++-
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py 
b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 02e44c6f0503..6bbe15daca74 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -439,32 +439,30 @@ class ovsactions(nla):
 def dpstr(self, more=False):
 print_str = ""
 
-for field in self.nla_map:
+for field in self["attrs"]:
 if field[1] == "none" or self.get_attr(field[0]) is None:
 continue
 if print_str != "":
 print_str += ","
 
-if field[1] == "uint32":
-if field[0] == "OVS_ACTION_ATTR_OUTPUT":
-print_str += "%d" % int(self.get_attr(field[0]))
-elif field[0] == "OVS_ACTION_ATTR_RECIRC":
-print_str += "recirc(0x%x)" % int(self.get_attr(field[0]))
-elif field[0] == "OVS_ACTION_ATTR_TRUNC":
-print_str += "trunc(%d)" % int(self.get_attr(field[0]))
-elif field[0] == "OVS_ACTION_ATTR_DROP":
-print_str += "drop(%d)" % int(self.get_attr(field[0]))
-elif field[1] == "flag":
-if field[0] == "OVS_ACTION_ATTR_CT_CLEAR":
-print_str += "ct_clear"
-elif field[0] == "OVS_ACTION_ATTR_POP_VLAN":
-print_str += "pop_vlan"
-elif field[0] == "OVS_ACTION_ATTR_POP_ETH":
-print_str += "pop_eth"
-elif field[0] == "OVS_ACTION_ATTR_POP_NSH":
-print_str += "pop_nsh"
-elif field[0] == "OVS_ACTION_ATTR_POP_MPLS":
-print_str += "pop_mpls"
+if field[0] == "OVS_ACTION_ATTR_OUTPUT":
+print_str += "%d" % int(self.get_attr(field[0]))
+elif field[0] == "OVS_ACTION_ATTR_RECIRC":
+print_str += "recirc(0x%x)" % int(self.get_attr(field[0]))
+elif field[0] == "OVS_ACTION_ATTR_TRUNC":
+print_str += "trunc(%d)" % int(self.get_attr(field[0]))
+elif field[0] == "OVS_ACTION_ATTR_DROP":
+print_str += "drop(%d)" % int(self.get_attr(field[0]))
+elif field[0] == "OVS_ACTION_ATTR_CT_CLEAR":
+print_str += "ct_clear"
+elif field[0] == "OVS_ACTION_ATTR_POP_VLAN":
+print_str += "pop_vlan"
+elif field[0] == "OVS_ACTION_ATTR_POP_ETH":
+print_str += "pop_eth"
+elif field[0] == "OVS_ACTION_ATTR_POP_NSH":
+print_str += "pop_nsh"
+elif field[0] == "OVS_ACTION_ATTR_POP_MPLS":
+print_str += "pop_mpls"
 else:
 datum = self.get_attr(field[0])
 if field[0] == "OVS_ACTION_ATTR_CLONE":
@@ -472,7 +470,10 @@ class ovsactions(nla):
 print_str += datum.dpstr(more)
 print_str += ")"
 else:
-print_str += datum.dpstr(more)
+try:
+print_str += datum.dpstr(more)
+except:
+print_str += "{ATTR: %s not decoded}" % field[0]
 
 return print_str
 
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v3 0/7] selftests: net: Switch pmtu.sh to use the internal ovs script.

2024-06-25 Thread Aaron Conole
Currently, if a user wants to run pmtu.sh and cover all the provided test
cases, they need to install the Open vSwitch userspace utilities.  This
dependency is difficult for users as well as CI environments, because the
userspace build and setup may require lots of support and devel packages
to be installed, system setup to be correct, and things like permissions
and selinux policies to be properly configured.

The kernel selftest suite includes an ovs-dpctl.py utility which can
interact with the openvswitch module directly.  This lets developers and
CI environments run without needing too many extra dependencies - just
the pyroute2 python package.

This series enhances the ovs-dpctl utility to provide support for set()
and tunnel() flow specifiers, better ipv6 handling support, and the
ability to add tunnel vports, and LWT interfaces.  Finally, it modifies
the pmtu.sh script to call the ovs-dpctl.py utility rather than the
typical OVS userspace utilities.  The pmtu.sh can still fall back on
the Open vSwitch userspace utilities if the ovs-dpctl.py script can't
be used.

Aaron Conole (7):
  selftests: openvswitch: Support explicit tunnel port creation.
  selftests: openvswitch: Refactor actions parsing.
  selftests: openvswitch: Add set() and set_masked() support.
  selftests: openvswitch: Add support for tunnel() key.
  selftests: openvswitch: Support implicit ipv6 arguments.
  selftests: net: Use the provided dpctl rather than the vswitchd for
tests.
  selftests: net: add config for openvswitch

 tools/testing/selftests/net/config|   5 +
 .../selftests/net/openvswitch/ovs-dpctl.py| 368 +++---
 tools/testing/selftests/net/pmtu.sh   | 145 +--
 3 files changed, 451 insertions(+), 67 deletions(-)

-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v3 1/7] selftests: openvswitch: Support explicit tunnel port creation.

2024-06-25 Thread Aaron Conole
The OVS module can operate in conjunction with various types of
tunnel ports.  These are created as either explicit tunnel vport
types, OR by creating a tunnel interface which acts as an anchor
for the lightweight tunnel support.

This patch adds the ability to add tunnel ports to an OVS
datapath for testing various scenarios with tunnel ports.  With
this addition, the vswitch "plumbing" will at least be able to
push packets around using the tunnel vports.  Future patches
will add support for setting required tunnel metadata for lwts
in the datapath.  The end goal will be to push packets via these
tunnels, and will be used in an upcoming commit for testing the
path MTU.

Reviewed-by: Simon Horman 
Tested-by: Simon Horman 
Signed-off-by: Aaron Conole 
---
 .../selftests/net/openvswitch/ovs-dpctl.py| 81 +--
 1 file changed, 75 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py 
b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 9f8dec2f6539..02e44c6f0503 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -10,6 +10,7 @@ import ipaddress
 import logging
 import multiprocessing
 import re
+import socket
 import struct
 import sys
 import time
@@ -29,6 +30,7 @@ try:
 from pyroute2.netlink.exceptions import NetlinkError
 from pyroute2.netlink.generic import GenericNetlinkSocket
 import pyroute2
+import pyroute2.iproute
 
 except ModuleNotFoundError:
 print("Need to install the python pyroute2 package >= 0.6.")
@@ -1617,7 +1619,7 @@ class OvsVport(GenericNetlinkSocket):
 ("OVS_VPORT_ATTR_PORT_NO", "uint32"),
 ("OVS_VPORT_ATTR_TYPE", "uint32"),
 ("OVS_VPORT_ATTR_NAME", "asciiz"),
-("OVS_VPORT_ATTR_OPTIONS", "none"),
+("OVS_VPORT_ATTR_OPTIONS", "vportopts"),
 ("OVS_VPORT_ATTR_UPCALL_PID", "array(uint32)"),
 ("OVS_VPORT_ATTR_STATS", "vportstats"),
 ("OVS_VPORT_ATTR_PAD", "none"),
@@ -1625,6 +1627,13 @@ class OvsVport(GenericNetlinkSocket):
 ("OVS_VPORT_ATTR_NETNSID", "uint32"),
 )
 
+class vportopts(nla):
+nla_map = (
+("OVS_TUNNEL_ATTR_UNSPEC", "none"),
+("OVS_TUNNEL_ATTR_DST_PORT", "uint16"),
+("OVS_TUNNEL_ATTR_EXTENSION", "none"),
+)
+
 class vportstats(nla):
 fields = (
 ("rx_packets", "=Q"),
@@ -1693,7 +1702,7 @@ class OvsVport(GenericNetlinkSocket):
 raise ne
 return reply
 
-def attach(self, dpindex, vport_ifname, ptype):
+def attach(self, dpindex, vport_ifname, ptype, dport, lwt):
 msg = OvsVport.ovs_vport_msg()
 
 msg["cmd"] = OVS_VPORT_CMD_NEW
@@ -1702,12 +1711,43 @@ class OvsVport(GenericNetlinkSocket):
 msg["dpifindex"] = dpindex
 port_type = OvsVport.str_to_type(ptype)
 
-msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type])
 msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname])
 msg["attrs"].append(
 ["OVS_VPORT_ATTR_UPCALL_PID", [self.upcall_packet.epid]]
 )
 
+TUNNEL_DEFAULTS = [("geneve", 6081),
+   ("vxlan", 4789)]
+
+for tnl in TUNNEL_DEFAULTS:
+if ptype == tnl[0]:
+if not dport:
+dport = tnl[1]
+
+if not lwt:
+vportopt = OvsVport.ovs_vport_msg.vportopts()
+vportopt["attrs"].append(
+["OVS_TUNNEL_ATTR_DST_PORT", socket.htons(dport)]
+)
+msg["attrs"].append(
+["OVS_VPORT_ATTR_OPTIONS", vportopt]
+)
+else:
+port_type = OvsVport.OVS_VPORT_TYPE_NETDEV
+ipr = pyroute2.iproute.IPRoute()
+
+if tnl[0] == "geneve":
+ipr.link("add", ifname=vport_ifname, kind=tnl[0],
+ geneve_port=dport,
+ geneve_collect_metadata=True,
+ geneve_udp_zero_csum6_rx=1)
+elif tnl[0] == "vxlan":
+ipr.link("add", ifname=vport_ifname, kind=tnl[0],
+ vxlan_learning=0, vxlan_collect_metadata=1,
+ vxlan_udp_zero_csum6_rx=1, vxlan_port=dport)
+break
+msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type])
+
 try:
 reply = self.nlm_request(
 msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
@@ -2053,12 +2093,19 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), 
vpl=OvsVport()):
 for iface in ndb.interfaces:
 rep = vpl.info(iface.ifname, ifindex)
 if rep is not None:
+opts =

Re: [ovs-dev] OVN technical community meeting - June 24th

2024-06-25 Thread Frode Nordahl
Hello,

On Tue, Jun 25, 2024 at 6:04 PM Dumitru Ceara  wrote:
>
> Hi,
>
> Thanks to everyone who attended the OVN meeting yesterday!
>
> The recording is available here:
> https://www.youtube.com/watch?v=bonLhUj2oGg
>
> Transcripts:
> https://drive.google.com/file/d/1ioVXcHiuD-xr5RQTh5OSGbpmhUtIDiO_/view?usp=sharing
>
> Notes:
> https://docs.google.com/document/d/1dG4GwcYOSs4uArPGtOoaP5tH4KCto-GH_C3tIXSnZZ8/

Thank you for organizing, it was a good discussion! As promised I also
sent out an e-mail detailing some of the points we spoke about, I
suggest we continue the discussion there [0].

> The main discussion topic was BGP integration in OVN and mostly about
> where to run the BGP control plane (e.g., FRR on the side vs an in-tree
> BGP implementation) and how to get packets to it.  I'll try to list some
> points here:
>
> Frode suggested a "L2 load balancer" feature to be added to OVN to
> "sniff" BGP control packets reaching a logical router and send them out
> (through a VIF?) to FRR running alongside OVN on the host.
>
> Ilya also provided an alternative of using a new type of logical router
> port, similar to a loopback interface and bind BGP configuration to that
> one.
>
> I had an off-list discussion with Tim Rozet (in CC) from the OpenShift
> networking team in Red Hat and he shared an idea that might be
> interesting because it kind of combines the two above.  That is:
>
> - configure a TCP load balancer on the logical router we want FRR to run
> BGP for
> - this load balancer will "balance" (DNAT) traffic to a single backend
> IP which can be any tenant internal IP reserved for FRR to bind to
> - this IP could be configured on a regular OVN VIF that's further
> plugged into the namespace/container/VM where FRR runs
>
> Frode, would this be a good alternative for your use case?

Using a loopback address is indeed common in many configurations,
however as I see it there are two requirements that conflict with
that. One being the goal of minimizing configuration overhead through
the use of IPv6 LLA ("BGP Unnumbered ''), which inherently are point
to point, the other being support for BGP authentication, which is at
odds with NATing the BGP connection. An indirect blocker for this
would also be how popular BGP implementations do not allow setting a
next hop for routes transmitted over a IPv6 LLA.

See more detail in [0].

> In any case, if we end up having to add a new feature to explicitly
> handle BGP configuration (the "L2 load balancer" discussed in the
> meeting), I think my preference would be in the end to have an explicit
> "bgp" configuration in the northbound database.  We already do that for
> other features: DHCP, IGMP, multicast routing.

Ack, thank you for stating your preference here!

> Finally, I'd like to propose a new technical meeting instance in
> approximately 5 weeks.  I'm looking at:
>
> Monday, July 29th, 3PM UTC.
>
> I'll wait a few days before sending out the invite in case people raise
> objections.

Works for me!

0: https://mail.openvswitch.org/pipermail/ovs-dev/2024-June/415033.html

> Best regards,
> Dumitru Ceara
>
> On 6/21/24 11:05, Dumitru Ceara wrote:
> > Hi everyone,
> >
> > Just a small reminder for anyone interested in joining the next OVN
> > technical meeting.  It's scheduled to happen next week on Monday June
> > 24th at 3PM UTC.
> >
> > Meeting details:
> > Date/Time: Monday, June 24th, 3PM UTC
> > Link: meet.google.com/zns-gqsd-jdn
> > Agenda: 
> > https://docs.google.com/document/d/1dG4GwcYOSs4uArPGtOoaP5tH4KCto-GH_C3tIXSnZZ8/edit?usp=meetingnotes
> >
> > Please feel free to add any items you'd like to discuss to the agenda.
> > I added some of the things I'm aware of there already.
> >
> > Best regards,
> > Dumitru
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] RFC OVN: fabric integration

2024-06-25 Thread Frode Nordahl
Hello,

We are increasingly seeing requests for integration between OVN
powered CMSs/workloads and the fabric.

As a side note, this is a very interesting topic to me personally, and
I think there are opportunities in the long term for this class of
software to potentially fill a void for more automated and SDN-like
ways of managing the physical network, as previously closed physical
switch hardware is increasingly opening up to programmatic extension
and control.

While very exciting, it will take a while, both in terms of evolving
how networking teams are organized, in terms of the longevity of
networking gear making entity wide refresh cycles very long, not to
mention gathering agreement and momentum to build such a thing from
the pieces we have.


So to be pragmatic, we need to integrate with something that fabric
network engineers are comfortable with, and already available on most
networking hardware, be it closed or open, today.

The most ubiquitous routing protocol, which has prevailed in modern
layer 3 only data center designs [0], is BGP.

Use cases:
* Allow fabric to locate and direct traffic to reroutable resources
such as IPv4/IPv6 prefixes, Floating IPs (FIPs) and Load Balancer
VIPs.

* Use the fabric as a load balancer, announcing the same service IP on
multiple hosts (anycast).

* Aggregate announcements from stacked CMSes (i.e. Kubernetes running
on top of OpenStack).


Requirements:
* Data path must be hardware offloaded, i.e. the next hop address the
peer resolves for announcements of OVN resources needs to be an LRP
IP.

* Minimize configuration overhead through the use of IPv6 LLAs for
peering routing both IPv4 and IPv6 prefixes over a IPv6 BGP session
[1] (aka. “BGP Unnumbered”).

* Support ECMP out of the host, i.e. use L3 interfaces potentially
connecting to two different ToRs, instead of bonds, avoiding the
additional complexity of multi-chassis bonds.

* Support BGP authentication [2][3], i.e. the source, destination
address and ports in packet headers can not be changed.

* Compatibility
   * Running a BGP protocol suite on the host is becoming a thing in
its own right, and our users may have requirements of their own that
influence their choice of implementation. We need to take this into
account and choose integration methods that allow OVN to work with
multiple protocol suite implementations.

   * While we have the power to change and fix issues in popular
routing protocol suites, such as FRR, we need to be able to integrate
with versions that exist on networking hardware out there today.

Limitations that influence/dictate implementation choices:
* Peering with IPv6 LLAs to meet the configuration overhead
requirement makes the peering relationship point to point.

* Popular BGP implementations, such as FRR which is used as routing
protocol suite by many ToR open source NOSes, does not accept
sending/receiving IPv6 LLA next hop with the route, so the BGP peer
address will be used as next hop. (There are even mentions of 3rd
party nexthop currently not being supported, but not sure if that is
accurate [4]).

* As mentioned above, BGP authentication requires IP headers to be
unchanged for the BGP TCP packets going to/from the BGP speaker.


Proposed implementation:

We are in the process of preparing some RFC/PoC patches that at a high
level will:
* Manage a VRF in the system serving two purposes:
   * Leaking of route information from ovn-controller to the VRF
routing table, which a routing protocol suite can redistribute subject
to configuration.

   * Provide an IP endpoint that a VRF aware application, such as FRR,
can bind to serving as a BGP speaker on behalf of a OVN LRP IP.

* We will attach a OVN VIF to this VRF that has data path rules that:

   * Forward required traffic destined to the OVN LRP IP to the VRF.

   * Forward required traffic from the application bound to the VRF as
if it originated from the OVN LRP IP.


Hopefully we'll have something up on the list before the end of this
week, which makes it real and easier to reason about for further
discussion.


Prior art:

We recognize that there already exists a third party approach to this
in the ovn-bgp-agent [5] governed by OpenStack, and our goal with this
work is to provide a tighter integration that might cater generically
for other CMSes and use cases.


0: https://datatracker.ietf.org/doc/html/rfc7938
1: https://datatracker.ietf.org/doc/html/rfc5549
2: https://datatracker.ietf.org/doc/html/rfc2385
3: https://datatracker.ietf.org/doc/html/rfc5925
4: 
https://github.com/FRRouting/frr/blob/cc3519f3e6eaa06f762e0d447202df32df66e129/bgpd/bgp_route.c#L2719
5: https://docs.openstack.org/ovn-bgp-agent/latest/
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 net-next 0/7] selftests: net: Switch pmtu.sh to use the internal ovs script.

2024-06-25 Thread Jakub Kicinski
On Tue, 25 Jun 2024 11:17:14 -0400 Aaron Conole wrote:
> > BTW I popped the v2 back into the queue, so the next run (in 20min)
> > will tell us if that's the only thing we were missing 🤞️  
> 
> :)  I'll wait to post the v3 then.  So far, the only change I have is:
> 
> --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
> +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
> @@ -34,7 +34,7 @@ try:
>  
>  except ModuleNotFoundError:
>  print("Need to install the python pyroute2 package >= 0.6.")
> -sys.exit(0)
> +sys.exit(1)

Looks like it didn't get re-ingested :( please go ahead with the v3
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVN technical community meeting - June 24th

2024-06-25 Thread Dumitru Ceara
Hi,

Thanks to everyone who attended the OVN meeting yesterday!

The recording is available here:
https://www.youtube.com/watch?v=bonLhUj2oGg

Transcripts:
https://drive.google.com/file/d/1ioVXcHiuD-xr5RQTh5OSGbpmhUtIDiO_/view?usp=sharing

Notes:
https://docs.google.com/document/d/1dG4GwcYOSs4uArPGtOoaP5tH4KCto-GH_C3tIXSnZZ8/

The main discussion topic was BGP integration in OVN and mostly about
where to run the BGP control plane (e.g., FRR on the side vs an in-tree
BGP implementation) and how to get packets to it.  I'll try to list some
points here:

Frode suggested a "L2 load balancer" feature to be added to OVN to
"sniff" BGP control packets reaching a logical router and send them out
(through a VIF?) to FRR running alongside OVN on the host.

Ilya also provided an alternative of using a new type of logical router
port, similar to a loopback interface and bind BGP configuration to that
one.

I had an off-list discussion with Tim Rozet (in CC) from the OpenShift
networking team in Red Hat and he shared an idea that might be
interesting because it kind of combines the two above.  That is:

- configure a TCP load balancer on the logical router we want FRR to run
BGP for
- this load balancer will "balance" (DNAT) traffic to a single backend
IP which can be any tenant internal IP reserved for FRR to bind to
- this IP could be configured on a regular OVN VIF that's further
plugged into the namespace/container/VM where FRR runs

Frode, would this be a good alternative for your use case?

In any case, if we end up having to add a new feature to explicitly
handle BGP configuration (the "L2 load balancer" discussed in the
meeting), I think my preference would be in the end to have an explicit
"bgp" configuration in the northbound database.  We already do that for
other features: DHCP, IGMP, multicast routing.

Finally, I'd like to propose a new technical meeting instance in
approximately 5 weeks.  I'm looking at:

Monday, July 29th, 3PM UTC.

I'll wait a few days before sending out the invite in case people raise
objections.

Best regards,
Dumitru Ceara

On 6/21/24 11:05, Dumitru Ceara wrote:
> Hi everyone,
> 
> Just a small reminder for anyone interested in joining the next OVN
> technical meeting.  It's scheduled to happen next week on Monday June
> 24th at 3PM UTC.
> 
> Meeting details:
> Date/Time: Monday, June 24th, 3PM UTC
> Link: meet.google.com/zns-gqsd-jdn
> Agenda: 
> https://docs.google.com/document/d/1dG4GwcYOSs4uArPGtOoaP5tH4KCto-GH_C3tIXSnZZ8/edit?usp=meetingnotes
> 
> Please feel free to add any items you'd like to discuss to the agenda.
> I added some of the things I'm aware of there already.
> 
> Best regards,
> Dumitru

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 net-next 0/7] selftests: net: Switch pmtu.sh to use the internal ovs script.

2024-06-25 Thread Stefano Brivio
On Tue, 25 Jun 2024 07:06:54 -0700
Jakub Kicinski  wrote:

> On Tue, 25 Jun 2024 09:20:29 -0400 Aaron Conole wrote:
> > > I'm still wondering if the issue is Kconfig-related (plus possibly bad
> > > interaction with vng). I don't see the OVS knob enabled in the self-
> > > tests config. If it's implied by some other knob, and ends-up being
> > > selected as a module, vng could stumble upon loading the module at
> > > runtime, especially on incremental build (at least I experience that
> > > problem locally). I'm not even sure if the KCI is building
> > > incrementally or not, so all the above could is quite a wild guess.
> > >
> > > In any case I think adding the explicit CONFIG_OPENVSWITCH=y the
> > > selftest config would make the scenario more well defined.
> > 
> > That is in 7/7 - but there was a collision with a netfilter knob getting
> > turned on.  I can repost it as-is (just after rebasing) if you think
> > that is the only issue.  
> 
> Sorry for not checking it earlier, looks like the runner was missing
> pyroute:
> 
> # python3 ./tools/testing/selftests/net/openvswitch/ovs-dpctl.py
> Need to install the python pyroute2 package >= 0.6.
> 
> I guess run_cmd counter-productively eats the stderr output ? :(

Yes, otherwise it's rather noisy, but you can run the thing with
VERBOSE=1, see also 56490b623aa0 ("selftests: Add debugging options to
pmtu.sh").

Before that change, we didn't eat standard error, but in the general
case I guess it's quite an improvement.

-- 
Stefano

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 net-next 0/7] selftests: net: Switch pmtu.sh to use the internal ovs script.

2024-06-25 Thread Aaron Conole
Jakub Kicinski  writes:

> On Tue, 25 Jun 2024 10:14:24 -0400 Aaron Conole wrote:
>> > Sorry for not checking it earlier, looks like the runner was missing
>> > pyroute:
>> >
>> > # python3 ./tools/testing/selftests/net/openvswitch/ovs-dpctl.py
>> > Need to install the python pyroute2 package >= 0.6.
>> >
>> > I guess run_cmd counter-productively eats the stderr output ? :(  
>> 
>> Awesome :)  I will add a patch to ovs-dpctl that will turn the
>> sys.exit(0) into sys.exit(1) - that way it should do the skip.
>> 
>> When I previously tested, I put an error in the `try` without reading
>> the except being specifically for a ModuleNotFound error.
>> 
>> I'll make sure pyroute2 isn't installed when I run it again.
>> 
>> Thanks for your help Jakub and Paolo!
>
> BTW I popped the v2 back into the queue, so the next run (in 20min)
> will tell us if that's the only thing we were missing 🤞️

:)  I'll wait to post the v3 then.  So far, the only change I have is:

--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -34,7 +34,7 @@ try:
 
 except ModuleNotFoundError:
 print("Need to install the python pyroute2 package >= 0.6.")
-sys.exit(0)
+sys.exit(1)
 
 
 OVS_DATAPATH_FAMILY = "ovs_datapath"
---

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 net-next 0/7] selftests: net: Switch pmtu.sh to use the internal ovs script.

2024-06-25 Thread Jakub Kicinski
On Tue, 25 Jun 2024 10:14:24 -0400 Aaron Conole wrote:
> > Sorry for not checking it earlier, looks like the runner was missing
> > pyroute:
> >
> > # python3 ./tools/testing/selftests/net/openvswitch/ovs-dpctl.py
> > Need to install the python pyroute2 package >= 0.6.
> >
> > I guess run_cmd counter-productively eats the stderr output ? :(  
> 
> Awesome :)  I will add a patch to ovs-dpctl that will turn the
> sys.exit(0) into sys.exit(1) - that way it should do the skip.
> 
> When I previously tested, I put an error in the `try` without reading
> the except being specifically for a ModuleNotFound error.
> 
> I'll make sure pyroute2 isn't installed when I run it again.
> 
> Thanks for your help Jakub and Paolo!

BTW I popped the v2 back into the queue, so the next run (in 20min)
will tell us if that's the only thing we were missing 🤞️
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 net-next 0/7] selftests: net: Switch pmtu.sh to use the internal ovs script.

2024-06-25 Thread Aaron Conole
Jakub Kicinski  writes:

> On Tue, 25 Jun 2024 09:20:29 -0400 Aaron Conole wrote:
>> > I'm still wondering if the issue is Kconfig-related (plus possibly bad
>> > interaction with vng). I don't see the OVS knob enabled in the self-
>> > tests config. If it's implied by some other knob, and ends-up being
>> > selected as a module, vng could stumble upon loading the module at
>> > runtime, especially on incremental build (at least I experience that
>> > problem locally). I'm not even sure if the KCI is building
>> > incrementally or not, so all the above could is quite a wild guess.
>> >
>> > In any case I think adding the explicit CONFIG_OPENVSWITCH=y the
>> > selftest config would make the scenario more well defined.  
>> 
>> That is in 7/7 - but there was a collision with a netfilter knob getting
>> turned on.  I can repost it as-is (just after rebasing) if you think
>> that is the only issue.
>
> Sorry for not checking it earlier, looks like the runner was missing
> pyroute:
>
> # python3 ./tools/testing/selftests/net/openvswitch/ovs-dpctl.py
> Need to install the python pyroute2 package >= 0.6.
>
> I guess run_cmd counter-productively eats the stderr output ? :(

Awesome :)  I will add a patch to ovs-dpctl that will turn the
sys.exit(0) into sys.exit(1) - that way it should do the skip.

When I previously tested, I put an error in the `try` without reading
the except being specifically for a ModuleNotFound error.

I'll make sure pyroute2 isn't installed when I run it again.

Thanks for your help Jakub and Paolo!

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 net-next 0/7] selftests: net: Switch pmtu.sh to use the internal ovs script.

2024-06-25 Thread Jakub Kicinski
On Tue, 25 Jun 2024 09:20:29 -0400 Aaron Conole wrote:
> > I'm still wondering if the issue is Kconfig-related (plus possibly bad
> > interaction with vng). I don't see the OVS knob enabled in the self-
> > tests config. If it's implied by some other knob, and ends-up being
> > selected as a module, vng could stumble upon loading the module at
> > runtime, especially on incremental build (at least I experience that
> > problem locally). I'm not even sure if the KCI is building
> > incrementally or not, so all the above could is quite a wild guess.
> >
> > In any case I think adding the explicit CONFIG_OPENVSWITCH=y the
> > selftest config would make the scenario more well defined.  
> 
> That is in 7/7 - but there was a collision with a netfilter knob getting
> turned on.  I can repost it as-is (just after rebasing) if you think
> that is the only issue.

Sorry for not checking it earlier, looks like the runner was missing
pyroute:

# python3 ./tools/testing/selftests/net/openvswitch/ovs-dpctl.py
Need to install the python pyroute2 package >= 0.6.

I guess run_cmd counter-productively eats the stderr output ? :(
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] Add GSO UDP Offloading feature to OVS Internal Port

2024-06-25 Thread Ilya Maximets
On 6/25/24 15:27, Mike Pattrick wrote:
> On Tue, Jun 25, 2024 at 4:30 AM echken  wrote:
>>
>> The OVS internal port does not support UDP fragmentation offloading,
>> resulting in large packets sent through the OVS internal port to OVS
>> being prematurely fragmented. This increases the total number of packets
>> processed in the path from the vport to the OVS bridge output port,
>> affecting transmission efficiency.
>>
>> Signed-off-by: echken 
>> ---
>>  net/openvswitch/vport-internal_dev.c | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/openvswitch/vport-internal_dev.c 
>> b/net/openvswitch/vport-internal_dev.c
>> index 74c88a6baa43..c5a72c4dc6fd 100644
>> --- a/net/openvswitch/vport-internal_dev.c
>> +++ b/net/openvswitch/vport-internal_dev.c
>> @@ -110,7 +110,8 @@ static void do_setup(struct net_device *netdev)
>>
>> netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
>>NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
>> -  NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
>> +  NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL |
>> +  NETIF_F_GSO_UDP | NETIF_F_GSO_UDP_L4;
> 
> I'll try testing this out, but preliminarily, NETIF_F_GSO_SOFTWARE
> already contains NETIF_F_GSO_UDP_L4.

And as far as my understanding goes, NETIF_F_GSO_UDP is
deprecated for all use, except for tuntap.  So, we probably
shouldn't add it.  UFO generally creates more issues than
it solves.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] Add GSO UDP Offloading feature to OVS Internal Port

2024-06-25 Thread Mike Pattrick
On Tue, Jun 25, 2024 at 4:30 AM echken  wrote:
>
> The OVS internal port does not support UDP fragmentation offloading,
> resulting in large packets sent through the OVS internal port to OVS
> being prematurely fragmented. This increases the total number of packets
> processed in the path from the vport to the OVS bridge output port,
> affecting transmission efficiency.
>
> Signed-off-by: echken 
> ---
>  net/openvswitch/vport-internal_dev.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/net/openvswitch/vport-internal_dev.c 
> b/net/openvswitch/vport-internal_dev.c
> index 74c88a6baa43..c5a72c4dc6fd 100644
> --- a/net/openvswitch/vport-internal_dev.c
> +++ b/net/openvswitch/vport-internal_dev.c
> @@ -110,7 +110,8 @@ static void do_setup(struct net_device *netdev)
>
> netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
>NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
> -  NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
> +  NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL |
> +  NETIF_F_GSO_UDP | NETIF_F_GSO_UDP_L4;

I'll try testing this out, but preliminarily, NETIF_F_GSO_SOFTWARE
already contains NETIF_F_GSO_UDP_L4.


Thanks,
Mike

>
> netdev->vlan_features = netdev->features;
> netdev->hw_enc_features = netdev->features;
> --
> 2.34.1
>
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 net-next 0/7] selftests: net: Switch pmtu.sh to use the internal ovs script.

2024-06-25 Thread Aaron Conole
Paolo Abeni  writes:

> On Mon, 2024-06-24 at 12:53 -0400, Aaron Conole wrote:
>> Aaron Conole  writes:
>> 
>> > Jakub Kicinski  writes:
>> > 
>> > > On Thu, 20 Jun 2024 08:55:54 -0400 Aaron Conole wrote:
>> > > > This series enhances the ovs-dpctl utility to provide support for set()
>> > > > and tunnel() flow specifiers, better ipv6 handling support, and the
>> > > > ability to add tunnel vports, and LWT interfaces.  Finally, it modifies
>> > > > the pmtu.sh script to call the ovs-dpctl.py utility rather than the
>> > > > typical OVS userspace utilities.
>> > > 
>> > > Thanks for the work! 
>> > > 
>> > > Looks like the series no longer applies because of other changes
>> > > to the kernel config. Before it stopped applying we got some runs,
>> > > here's what I see:
>> > > 
>> > > https://netdev-3.bots.linux.dev/vmksft-net/results/648440/3-pmtu-sh/stdout
>> > > 
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv4, OVS vxlan4: PMTU exceptions [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv4, OVS vxlan4: PMTU exceptions - nexthop objects
>> > > [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv6, OVS vxlan4: PMTU exceptions [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv6, OVS vxlan4: PMTU exceptions - nexthop objects
>> > > [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv4, OVS vxlan6: PMTU exceptions [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv4, OVS vxlan6: PMTU exceptions - nexthop objects
>> > > [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv6, OVS vxlan6: PMTU exceptions [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv6, OVS vxlan6: PMTU exceptions - nexthop objects
>> > > [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv4, OVS geneve4: PMTU exceptions [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv4, OVS geneve4: PMTU exceptions - nexthop objects
>> > > [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv6, OVS geneve4: PMTU exceptions [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv6, OVS geneve4: PMTU exceptions - nexthop objects
>> > > [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv4, OVS geneve6: PMTU exceptions [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv4, OVS geneve6: PMTU exceptions - nexthop objects
>> > > [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > # TEST: IPv6, OVS geneve6: PMTU exceptions [FAIL]
>> > > # Cannot find device "ovs_br0"
>> > > 
>> > > Any idea why? Looks like kernel config did include OVS, perhaps we need
>> > > explicit modprobe now? I don't see any more details in the logs.
>> > 
>> > Strange.  I expected that the module should have automatically been
>> > loaded when attempting to communicate with the OVS genetlink family
>> > type.  At least, that is how it had been working previously.
>> > 
>> > I'll spend some time looking into it and resubmit a rebased version.
>> > Thanks, Jakub!
>> 
>> If the ovs module isn't available, then I see:
>> 
>> #   ovs_bridge not supported
>> # TEST: IPv4, OVS vxlan4: PMTU exceptions [SKIP]
>> 
>> But if it is available, I haven't been able to reproduce such ovs_br0
>> setup failure - things work.
>
> I'm still wondering if the issue is Kconfig-related (plus possibly bad
> interaction with vng). I don't see the OVS knob enabled in the self-
> tests config. If it's implied by some other knob, and ends-up being
> selected as a module, vng could stumble upon loading the module at
> runtime, especially on incremental build (at least I experience that
> problem locally). I'm not even sure if the KCI is building
> incrementally or not, so all the above could is quite a wild guess.
>
> In any case I think adding the explicit CONFIG_OPENVSWITCH=y the
> selftest config would make the scenario more well defined.

That is in 7/7 - but there was a collision with a netfilter knob getting
turned on.  I can repost it as-is (just after rebasing) if you think
that is the only issue.

> Cheers,
>
> Paolo

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 net-next 0/7] selftests: net: Switch pmtu.sh to use the internal ovs script.

2024-06-25 Thread Aaron Conole
Jakub Kicinski  writes:

> On Mon, 24 Jun 2024 12:53:45 -0400 Aaron Conole wrote:
>> Additionally, the "Cannot find device ..." text comes from an iproute2
>> utility output.  The only place we actually interact with that is via
>> the call at pmtu.sh:973:
>> 
>>  run_cmd ip link set ovs_br0 up
>> 
>> Maybe it is possible that the link isn't up (could some port memory
>> allocation or message be delaying it?) yet in the virtual environment.
>
> Depends on how the creation is implemented, normally device creation
> over netlink is synchronous. Just to be sure have you tried to repro
> with vng:
>
> https://github.com/linux-netdev/nipa/wiki/How-to-run-netdev-selftests-CI-style
>
> ? It could be the base OS difference, too, but that's harder to confirm.

Yes - that's the way I run it.  But I didn't try to use any of the
stress inducing options.  I'll work on it with that.

>> To confirm, is it possible to run in the constrained environment, but
>> put a 5s sleep or something?  I will add the following either as a
>> separate patch (ie 7/8), or I can fold it into 6/7 (and drop Stefano's
>> ACK waiting for another review):
>> 
>> 
>> wait_for_if() {
>>if ip link show "$2" >/dev/null 2>&1; then return 0; fi
>> 
>>for d in `seq 1 30`; do
>>   sleep 1
>>   if ip link show "$2" >/dev/null 2>&1; then return 0; fi
>>done
>>return 1
>> }
>> 
>> 
>>  setup_ovs_br_internal || setup_ovs_br_vswitchd || return $ksft_skip
>> +wait_for_if "ovs_br0"
>>  run_cmd ip link set ovs_br0 up
>> 
>> 
>> Does it make sense or does it seem like I am way off base?
>
> sleep 1 is a bit high (sleep does accept fractional numbers!)
> but otherwise worth trying, if you can't repro locally.

Ack.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [v4] odp-execute: Fix AVX checksum calculation.

2024-06-25 Thread Phelan, Michael
> -Original Message-
> From: Eelco Chaudron 
> Sent: Tuesday, June 25, 2024 11:37 AM
> To: Phelan, Michael 
> Cc: Finn, Emma ; Ilya Maximets
> ; ovs-dev@openvswitch.org; Van Haaren, Harry
> 
> Subject: Re: [v4] odp-execute: Fix AVX checksum calculation.
> 
> 
> 
> On 14 Jun 2024, at 10:17, Eelco Chaudron wrote:
> 
> >> Op 14 jun 2024 om 10:13 heeft Phelan, Michael
>  het volgende geschreven:
> >>
> >> 
> >>>
> >>> -Original Message-
> >>> From: Eelco Chaudron 
> >>> Sent: Thursday, June 13, 2024 12:45 PM
> >>> To: Finn, Emma ; Phelan, Michael
> >>> 
> >>> Cc: Ilya Maximets ; ovs-dev@openvswitch.org; Van
> >>> Haaren, Harry 
> >>> Subject: Re: [v4] odp-execute: Fix AVX checksum calculation.
> >>>
> >>>
> >>>

> 
> > In addition, any idea why these tests do not fail in Intel’s
> > upstream unit
> >>> tests?
> 
> > Do they use different hardware? Copied in Michael, maybe he knows
> > more
> 
> > about the setup/tests.
> 
> >
> 
> > //Eelco
> 
> >
> 
> 
> 
>  I have investigated both unit test failures.
> 
>  1064: ofproto - implicit mask of ipv6 proto with HOPOPT field
>  FAILED
> >>> (ofproto.at:6668)
> 
>  For this one, the AVX implementation didn't handle setting the IPv6
>  traffic
> >>> class field.
> 
> 
> 
>  2615: nsh - triangle PTAP bridge setup with NSH over vxlan-gpe
>  FAILED
> >>> (nsh.at:816)
> 
>  For this one, the AVX implementation was missing a check for IPv4
>  checksum
> >>> offload flag.
> 
>  I have 2 separate patches to fix these issues and will send shortly.
> >>>
> >>> Thanks Emma, I’ll review them next week, as I’m out at a conference
> >>> (and a lot of internal meetings).
> >>>
>  As for the Intel unit test CI (ovsrobot/intel-ovs-compilation),
>  make check is
> >>> never run with
> 
>  any of the AVX autovalidators enabled. Table below shows the 4
>  builds and
> >>> the unit tests ran
> 
>  after each build.
> >>>
> >>> I guess it would be good to add the “make check” to the runs below.
> >>> Michael would you be able to set this up?
> >> Hi Eelco,
> >> Yes, I can add make check to all of the runs on Intel CI. I will set that 
> >> up now.
> >
> > Thanks Michael for adding this. You might want to wait until the patches are
> in as it will fail without them.
> 
> Michael, the fixes are included in the main branch, so please go ahead and add
> the extra test cases.

Thanks Eelco, I will add the extra tests now.

Kind regards,
Michael.

> 
> Cheers,
> 
> Eelco
> 
> 


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v4 04/10] net: psample: allow using rate as probability

2024-06-25 Thread Ilya Maximets
On 6/25/24 13:17, Paolo Abeni wrote:
> On Fri, 2024-06-21 at 12:10 +0200, Adrian Moreno wrote:
>> diff --git a/include/uapi/linux/tc_act/tc_sample.h 
>> b/include/uapi/linux/tc_act/tc_sample.h
>> index fee1bcc20793..7ee0735e7b38 100644
>> --- a/include/uapi/linux/tc_act/tc_sample.h
>> +++ b/include/uapi/linux/tc_act/tc_sample.h
>> @@ -18,6 +18,7 @@ enum {
>>  TCA_SAMPLE_TRUNC_SIZE,
>>  TCA_SAMPLE_PSAMPLE_GROUP,
>>  TCA_SAMPLE_PAD,
>> +TCA_SAMPLE_PROBABILITY,
>>  __TCA_SAMPLE_MAX
>>  };
>>  #define TCA_SAMPLE_MAX (__TCA_SAMPLE_MAX - 1)
> 
> I believe Ilya's comment on v3 is correct, this chunk looks unrelated
> and unneeded. I guess you can drop it? Or am I missing something?

Oops, somehow I missed that there is a v4 and replied to v3.
So, my bad.  But yes, the comment still applies here.

Best regards, Ilya Maximets.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v4 04/10] net: psample: allow using rate as probability

2024-06-25 Thread Paolo Abeni
On Fri, 2024-06-21 at 12:10 +0200, Adrian Moreno wrote:
> diff --git a/include/uapi/linux/tc_act/tc_sample.h 
> b/include/uapi/linux/tc_act/tc_sample.h
> index fee1bcc20793..7ee0735e7b38 100644
> --- a/include/uapi/linux/tc_act/tc_sample.h
> +++ b/include/uapi/linux/tc_act/tc_sample.h
> @@ -18,6 +18,7 @@ enum {
>   TCA_SAMPLE_TRUNC_SIZE,
>   TCA_SAMPLE_PSAMPLE_GROUP,
>   TCA_SAMPLE_PAD,
> + TCA_SAMPLE_PROBABILITY,
>   __TCA_SAMPLE_MAX
>  };
>  #define TCA_SAMPLE_MAX (__TCA_SAMPLE_MAX - 1)

I believe Ilya's comment on v3 is correct, this chunk looks unrelated
and unneeded. I guess you can drop it? Or am I missing something?

Thanks,

Paolo

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 2/2] ci: Move DPDK build into container.

2024-06-25 Thread Dumitru Ceara
On 6/24/24 16:33, Ales Musil wrote:
> The DPDK was built as extra step in the CI, however it is useful to
> have it inside the container prepared. This also helps with
> reproduction of failures with DPDK by having the exact version inside
> the container already.
> 
> Also bump the Ubuntu version for container builds to 24.04 to get
> Podman 4. This is required to avoid the tar permissions error:
> 
> tar: dpdk-23.11/.ci: Cannot change mode to rwxrwxr-x: Operation not permitted
> 
> Signed-off-by: Ales Musil 
> Acked-by: Eelco Chaudron 
> ---
> v2: Bump the meson version.
> Add ack from Eelco.

Thanks, Ales and Eelco!  Applied to main.

Regards,
Dumitru

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 1/2] ci: Move common build steps into script.

2024-06-25 Thread Dumitru Ceara
On 6/24/24 16:33, Ales Musil wrote:
> Move common preparation steps into script that can be invoked by both
> container builds. This will ensure that any update will be reflected
> in both containers, and it reduces the duplication between both
> containers. At the same time use the --user argument which avoids
> the error below and allows pip to upgrade itself:
> 
> ERROR: Cannot uninstall pip 24.0, RECORD file not found.
> Hint: The package was installed by debian.
> 
> Signed-off-by: Ales Musil 
> Acked-by: Eelco Chaudron 
> ---
> v2: Use --user for pip update.
> Add ack from Eelco.
> ---
>  utilities/automake.mk  |  1 +
>  utilities/containers/fedora/Dockerfile | 35 
>  utilities/containers/prepare.sh| 37 ++
>  utilities/containers/ubuntu/Dockerfile | 37 +-
>  4 files changed, 49 insertions(+), 61 deletions(-)
>  create mode 100755 utilities/containers/prepare.sh
> 
> diff --git a/utilities/automake.mk b/utilities/automake.mk
> index de4f6efb5..03e9096fa 100644
> --- a/utilities/automake.mk
> +++ b/utilities/automake.mk
> @@ -42,6 +42,7 @@ EXTRA_DIST += \
>  utilities/containers/Makefile \
>  utilities/containers/openbfdd.patch \
>  utilities/containers/py-requirements.txt \
> +utilities/containers/prepare.sh \
>  utilities/containers/fedora/Dockerfile \
>  utilities/containers/ubuntu/Dockerfile \
>  utilities/docker/Makefile \
> diff --git a/utilities/containers/fedora/Dockerfile 
> b/utilities/containers/fedora/Dockerfile
> index 019e9f138..f28c00b5d 100755
> --- a/utilities/containers/fedora/Dockerfile
> +++ b/utilities/containers/fedora/Dockerfile
> @@ -45,41 +45,16 @@ RUN dnf -y update \
>  && \
>  dnf clean all
>  
> -# Compile sparse from source
> -WORKDIR /workspace/sparse
> +ENV PATH "${PATH}:${HOME}/bin:${HOME}/.local/bin"

I removed this one, it's not needed as we don't install requirements as
--user.

>  
> -RUN git clone git://git.kernel.org/pub/scm/devel/sparse/sparse.git \
> -/workspace/sparse \
> -&& \
> -make -j4 PREFIX=/usr HAVE_LLVM= HAVE_SQLITE= install
> -
> -# Compile OpenBFDD from source
> -WORKDIR /workspace/OpenBFDD
> +WORKDIR /workspace
>  
>  COPY $CONTAINERS_PATH/openbfdd.patch /tmp/openbfdd.patch
>  
> -RUN git clone https://github.com/dyninc/OpenBFDD.git \
> -/workspace/OpenBFDD \
> -&& \
> -git apply /tmp/openbfdd.patch \
> -&& \
> -./autogen.sh \
> -&& \
> -./configure --enable-silent-rules \
> -&& \
> -make \
> -&& \
> -make install
> -
> -WORKDIR /workspace
> -
>  COPY $CONTAINERS_PATH/py-requirements.txt /tmp/py-requirements.txt
>  
> -# Update and install pip dependencies
> -RUN python3 -m pip install --upgrade pip \
> -&& \
> -python3 -m pip install wheel \
> -&& \
> -python3 -m pip install -r /tmp/py-requirements.txt
> +COPY $CONTAINERS_PATH/prepare.sh /tmp/prepare.sh
> +
> +RUN /tmp/prepare.sh
>  
>  CMD ["/usr/sbin/init"]
> diff --git a/utilities/containers/prepare.sh b/utilities/containers/prepare.sh
> new file mode 100755
> index 0..b3baa4345
> --- /dev/null
> +++ b/utilities/containers/prepare.sh
> @@ -0,0 +1,37 @@
> +#!/bin/bash -xe
> +
> +function compile_sparse()
> +{
> +git clone git://git.kernel.org/pub/scm/devel/sparse/sparse.git \
> +/workspace/sparse
> +
> +pushd sparse
> +make -j4 PREFIX=/usr HAVE_LLVM= HAVE_SQLITE= install
> +popd
> +}
> +
> +function compile_openbfdd()
> +{
> +git clone https://github.com/dyninc/OpenBFDD.git \
> +/workspace/OpenBFDD
> +
> +pushd OpenBFDD
> +git apply /tmp/openbfdd.patch
> +./autogen.sh
> +./configure --enable-silent-rules
> +make
> +make install
> +popd
> +}
> +
> +function install_python_dep()
> +{
> +# The --user should be removed once pip can be upgraded on Ubuntu.
> +python3 -m pip install --user --upgrade pip
> +python3 -m pip install wheel
> +python3 -m pip install -r /tmp/py-requirements.txt
> +}
> +
> +compile_sparse
> +compile_openbfdd
> +install_python_dep
> diff --git a/utilities/containers/ubuntu/Dockerfile 
> b/utilities/containers/ubuntu/Dockerfile
> index ce7ce16c6..49ba861ac 100755
> --- a/utilities/containers/ubuntu/Dockerfile
> +++ b/utilities/containers/ubuntu/Dockerfile
> @@ -45,48 +45,23 @@ RUN apt update -y \
>  && \
>  apt clean
>  
> -# Compile sparse from source
> -WORKDIR /workspace/sparse
> +ENV PATH "${PATH}:${HOME}/bin:${HOME}/.local/bin"

This one too.

>  
> -RUN git clone git://git.kernel.org/pub/scm/devel/sparse/sparse.git \
> -/workspace/sparse \
> -&& \
> -make -j4 PREFIX=/usr HAVE_LLVM= HAVE_SQLITE= install
> -
> -# Compile OpenBFDD from source
> -WORKDIR /workspace/OpenBFDD
> +WORKDIR /workspace
>  
>  COPY $CONTAINERS_PATH/openbfdd.patch /tmp/openbfdd.patch
>  
> -RUN git clone https://github.com/dyninc/OpenBFDD.git \
> -/workspace/OpenBFDD \
> -&& \
> -git

Re: [ovs-dev] [v4] odp-execute: Fix AVX checksum calculation.

2024-06-25 Thread Eelco Chaudron


On 14 Jun 2024, at 10:17, Eelco Chaudron wrote:

>> Op 14 jun 2024 om 10:13 heeft Phelan, Michael  het 
>> volgende geschreven:
>>
>> 
>>>
>>> -Original Message-
>>> From: Eelco Chaudron 
>>> Sent: Thursday, June 13, 2024 12:45 PM
>>> To: Finn, Emma ; Phelan, Michael
>>> 
>>> Cc: Ilya Maximets ; ovs-dev@openvswitch.org; Van
>>> Haaren, Harry 
>>> Subject: Re: [v4] odp-execute: Fix AVX checksum calculation.
>>>
>>>
>>>
>>> On 12 Jun 2024, at 12:42, Finn, Emma wrote:
>>>
> -Original Message-

> From: Eelco Chaudron 

> Sent: Thursday, May 30, 2024 2:44 PM

> To: Finn, Emma ; Phelan, Michael

> 

> Cc: Ilya Maximets ; ovs-dev@openvswitch.org; Van

> Haaren, Harry 

> Subject: Re: [v4] odp-execute: Fix AVX checksum calculation.

>

>

>

>> On 30 May 2024, at 15:28, Eelco Chaudron wrote:

>

>>> On 30 May 2024, at 14:46, Finn, Emma wrote:

>>

 -Original Message-

 From: Eelco Chaudron
>>> mailto:echau...@redhat.com>>

 Sent: Wednesday, May 29, 2024 3:23 PM

 To: Finn, Emma
>>> mailto:emma.f...@intel.com>>

 Cc: Ilya Maximets mailto:i.maxim...@ovn.org>>
>>> ; ovs-dev@openvswitch.org ; Van

 Haaren, Harry
>>> mailto:harry.van.haa...@intel.com>>

 Subject: Re: [v4] odp-execute: Fix AVX checksum calculation.







> On 29 May 2024, at 14:51, Ilya Maximets wrote:



>> On 5/29/24 11:01, Eelco Chaudron wrote:

>>

>>

>>> On 28 May 2024, at 16:49, Ilya Maximets wrote:

>>

 On 5/28/24 14:36, Eelco Chaudron wrote:





> On 24 May 2024, at 11:20, Emma Finn wrote:



> The AVX implementation for calcualting checksums was not

> handling carry-over addition correctly in some cases.

> This patch adds an additional shuffle to add 16-bit padding to

> the final part of the calculation to handle such cases. This

> commit also adds a unit test to check the checksum carry-bits

> issue with actions autovalidator enabled.



 Hi Emma,



 I made the small changes, and did some more testing before I
>>> committed.

 However, there are more failures in the same area with or without your

> patch.

 I’m holding of committing this patch as it might be related.



>>>

>>> Hi Eelco,

>>>

>>> These tests are unrelated to this patch so I think we should go ahead 
>>> and

> merge this.

>>

>> Ok, I’ll go ahead and apply it later today.

>>

 The failing tests are (on latest main branch):



 1064: ofproto - implicit mask of ipv6 proto with HOPOPT field FAILED

 (ofproto.at:6668)

>>>

>>> I investigated this test and the SIMD implementation isn't handling 
>>> traffic

> class field correctly. I'm on PTO for the next week but I will make a fix 
> for this

> once I'm back.

>>

>> Thanks!

>>

 2615: nsh - triangle PTAP bridge setup with NSH over vxlan-gpe

 FAILED

 (nsh.at:816)



>>> For this one it looks like the scalar is expecting an ipv4 checksum of 
>>> 0x000

> and the SIMD implementation has calculated an ipv4 checksum of 0xDF77.

>>> This is more a logic question whether or not the checksum should be

> calculated for this? Thoughts?

>>

>> I need to look at the tests, but if it’s a UDP packet, and the original 
>> UDP

> checksum was 0, it should stay zero.

>

>

> In addition, any idea why these tests do not fail in Intel’s upstream unit
>>> tests?

> Do they use different hardware? Copied in Michael, maybe he knows more

> about the setup/tests.

>

> //Eelco

>



 I have investigated both unit test failures.

 1064: ofproto - implicit mask of ipv6 proto with HOPOPT field FAILED
>>> (ofproto.at:6668)

 For this one, the AVX implementation didn't handle setting the IPv6 traffic
>>> class field.



 2615: nsh - triangle PTAP bridge setup with NSH over vxlan-gpe FAILED
>>> (nsh.at:816)

 For this one, the AVX implementation was missing a check for IPv4 checksum
>>> offload flag.

 I have 2 separate patches to fix these issues and will send shortly.
>>>
>>> Thanks Emma, I’ll review them next week, as I’m out at a conference (and a 
>>> lot
>>> of internal meetings).
>

Re: [ovs-dev] [v2] odp-execute: Check IPv4 checksum offload flag in AVX.

2024-06-25 Thread Eelco Chaudron



On 17 Jun 2024, at 16:08, Emma Finn wrote:

> The AVX implementation for IPv4 action did not check whether
> the IPv4 checksum offload flag has been set and was incorrectly
> calculating checksums in software. Adding a check to skip AVX
> checksum calculation when offload flags are set.
>
> Signed-off-by: Emma Finn 
> Reported-by: Eelco Chaudron 

Thanks Emma and Mike, the patch was committed to main and the 3.3 branch.

Cheers,

Eelco

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] odp-execute: Set IPv6 traffic class in AVX implementation.

2024-06-25 Thread Eelco Chaudron



On 12 Jun 2024, at 12:44, Emma Finn wrote:

> The AVX implementation for the IPv6 action did not set
> traffic class field. Adding support for this field to
> the AVX implementation.
>
> Signed-off-by: Emma Finn 
> Reported-by: Eelco Chaudron 

Thanks for the patch Emma, it has been applied to main and down to 3.2.

Cheers,

Eelco

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 net-next 0/7] selftests: net: Switch pmtu.sh to use the internal ovs script.

2024-06-25 Thread Stefano Brivio
On Mon, 24 Jun 2024 15:30:23 -0700
Jakub Kicinski  wrote:

> On Mon, 24 Jun 2024 12:53:45 -0400 Aaron Conole wrote:
> > Additionally, the "Cannot find device ..." text comes from an iproute2
> > utility output.  The only place we actually interact with that is via
> > the call at pmtu.sh:973:
> > 
> > run_cmd ip link set ovs_br0 up
> > 
> > Maybe it is possible that the link isn't up (could some port memory
> > allocation or message be delaying it?) yet in the virtual environment.  
> 
> Depends on how the creation is implemented, normally device creation
> over netlink is synchronous.

It also looks like pyroute2 would keep everything synchronous (unless
you call NetlinkSocket.bind(async_cache=True))... weird.

> Just to be sure have you tried to repro with vng:
> 
> https://github.com/linux-netdev/nipa/wiki/How-to-run-netdev-selftests-CI-style
> 
> ? It could be the base OS difference, too, but that's harder to confirm.
> 
> > To confirm, is it possible to run in the constrained environment, but
> > put a 5s sleep or something?  I will add the following either as a
> > separate patch (ie 7/8), or I can fold it into 6/7 (and drop Stefano's
> > ACK waiting for another review):
> > 
> > 
> > wait_for_if() {
> >if ip link show "$2" >/dev/null 2>&1; then return 0; fi
> > 
> >for d in `seq 1 30`; do
> >   sleep 1
> >   if ip link show "$2" >/dev/null 2>&1; then return 0; fi
> >done
> >return 1
> > }
> > 
> > 
> > setup_ovs_br_internal || setup_ovs_br_vswitchd || return $ksft_skip
> > +   wait_for_if "ovs_br0"
> > run_cmd ip link set ovs_br0 up
> > 
> > 
> > Does it make sense or does it seem like I am way off base?  
> 
> sleep 1 is a bit high (sleep does accept fractional numbers!)

This script was originally (and mostly is) all nice and POSIX (where
sleep doesn't take fractional numbers), so, if you don't mind, I'd
rather prefer "sleep 0.1 || sleep 1". :)

-- 
Stefano

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/2] Add GSO UDP Offloading feature to OVS Internal Port

2024-06-25 Thread echken
The OVS internal port does not support UDP fragmentation offloading,
resulting in large packets sent through the OVS internal port to OVS
being prematurely fragmented. This increases the total number of packets
processed in the path from the vport to the OVS bridge output port,
affecting transmission efficiency.

Signed-off-by: echken 
---
 net/openvswitch/vport-internal_dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/openvswitch/vport-internal_dev.c 
b/net/openvswitch/vport-internal_dev.c
index 74c88a6baa43..c5a72c4dc6fd 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -110,7 +110,8 @@ static void do_setup(struct net_device *netdev)
 
netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
   NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
-  NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
+  NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL |
+  NETIF_F_GSO_UDP | NETIF_F_GSO_UDP_L4;
 
netdev->vlan_features = netdev->features;
netdev->hw_enc_features = netdev->features;
-- 
2.34.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 4/4] ovsdb: Optimize monitor update by directly serializing data into ds.

2024-06-25 Thread Grigorii Nazarov
Currently serialization is performed by first converting
the internal data representation into JSON objects, followed by
serializing these objects by jsonrpc. This process results in lots of
allocations for these intermediate objects. Consequently, this
not only increases peak memory consumption, but also
demands significantly more CPU work.

By forming row-update JSONs directly in `ds`, which is then used
to create 'serialized object' JSONs, the overall speed increased
by a factor of 2.3.

A local benchmark was run on a proprietary Southbound backup.
Both versions, before and after applying the patch, were measured.
For each version, there were two runs with 10 parallel clients,
and two runs with 30 parallel clients. CPU time was recorded
after startup (before clients started running) and after
all clients received all updates. Clients were essentially running
`ovsdb-client monitor-cond unix:pipe OVN_Southbound
'[[\"actions\",\"!=\",\"sdfdfsd\"]]' Logical_Flow`.
Similar results were obtained with other requests
that required a significant amount of data transfer.
The backup size is about 600 MB. Results are measured in seconds.

Before  After
Baseline x10:   9.53108.54
Baseline x10:   9.62108.67
Baseline x30:   9.69307.04
Baseline x30:   9.65303.32
Patch x10:  9.6752.57
Patch x10:  9.5753.12
Patch x30:  9.53136.33
Patch x30:  9.63135.88

Signed-off-by: Grigorii Nazarov 
---
 include/openvswitch/json.h |   1 +
 lib/json.c |   8 ++
 lib/ovsdb-data.c   | 105 +++
 lib/ovsdb-data.h   |   3 +
 ovsdb/monitor.c| 210 -
 5 files changed, 254 insertions(+), 73 deletions(-)

diff --git a/include/openvswitch/json.h b/include/openvswitch/json.h
index 555440760..80b9479c7 100644
--- a/include/openvswitch/json.h
+++ b/include/openvswitch/json.h
@@ -81,6 +81,7 @@ struct json *json_boolean_create(bool);
 struct json *json_string_create(const char *);
 struct json *json_string_create_nocopy(char *);
 struct json *json_serialized_object_create(const struct json *);
+struct json *json_serialized_object_create_from_string(const char *);
 struct json *json_integer_create(long long int);
 struct json *json_real_create(double);
 
diff --git a/lib/json.c b/lib/json.c
index d40e93857..66b1f571f 100644
--- a/lib/json.c
+++ b/lib/json.c
@@ -199,6 +199,14 @@ json_serialized_object_create(const struct json *src)
 return json;
 }
 
+struct json *
+json_serialized_object_create_from_string(const char *s)
+{
+struct json *json = json_create(JSON_SERIALIZED_OBJECT);
+json->string = xstrdup(s);
+return json;
+}
+
 struct json *
 json_serialized_object_create_with_yield(const struct json *src)
 {
diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
index defb048d7..f32b7975a 100644
--- a/lib/ovsdb-data.c
+++ b/lib/ovsdb-data.c
@@ -492,6 +492,45 @@ ovsdb_atom_to_json__(const union ovsdb_atom *atom, enum 
ovsdb_atomic_type type,
 }
 }
 
+/* Serializes 'atom', of the specified 'type', into 'ds' in JSON format.
+ *
+ * Refer to RFC 7047 for the format of the JSON that this function produces. */
+static void
+ovsdb_atom_to_json_ds(const union ovsdb_atom *atom,
+  enum ovsdb_atomic_type type, struct ds *ds)
+{
+switch (type) {
+case OVSDB_TYPE_VOID:
+OVS_NOT_REACHED();
+
+case OVSDB_TYPE_INTEGER:
+ds_put_format(ds, "%lld", (long long) atom->integer);
+return;
+
+case OVSDB_TYPE_REAL:
+ds_put_format(ds, "%.*g", DBL_DIG, atom->real);
+return;
+
+case OVSDB_TYPE_BOOLEAN:
+ds_put_cstr(ds, atom->boolean ? "true" : "false");
+return;
+
+case OVSDB_TYPE_STRING:
+json_to_ds(atom->s, JSSF_SORT, ds);
+return;
+
+case OVSDB_TYPE_UUID:
+ds_put_cstr(ds, "[\"uuid\",\"");
+ds_put_format(ds, UUID_FMT, UUID_ARGS(&atom->uuid));
+ds_put_cstr(ds, "\"]");
+return;
+
+case OVSDB_N_TYPES:
+default:
+OVS_NOT_REACHED();
+}
+}
+
 struct json *
 ovsdb_atom_to_json(const union ovsdb_atom *atom, enum ovsdb_atomic_type type)
 {
@@ -1445,6 +1484,23 @@ ovsdb_base_to_json(const union ovsdb_atom *atom,
 }
 }
 
+static void
+ovsdb_base_to_json_ds(const union ovsdb_atom *atom,
+  const struct ovsdb_base_type *base,
+  bool use_row_names,
+  struct ds *ds)
+{
+if (!use_row_names
+|| base->type != OVSDB_TYPE_UUID
+|| !base->uuid.refTableName) {
+ovsdb_atom_to_json_ds(atom, base->type, ds);
+} else {
+ds_put_cstr(ds, "[\"named-uuid\",\"");
+ds_put_format(ds, UUID_ROW_FMT, UUID_ARGS(&atom->uuid));
+ds_put_cstr(ds, "\"]");
+}
+}
+
 static struct json *
 ovsdb_datum_to_json__(const struct ovsdb_datum *datum,
   const struct ovsdb_type *type,
@@ -1482,6 +1538,47 @@ ovsdb_datum_to_json__(const struct ovsdb_d

Re: [ovs-dev] [PATCH 1/4] .gitignore: add clangd configuration file

2024-06-25 Thread Grigorii Nazarov
On Thursday, June 20, 2024 9:52:07 PM GMT+3 Aaron Conole wrote:
> Why is this file existing?  Maybe it would be better to generate the
> compile_commands.json in automake?  Or generate it via the makefile?
I'm not checking in the file. It's local configuration file, used by clangd, 
which is clang-based language server used by IDEs like Visual Studio Code. It 
shouldn't be generated by project files and strictly speaking is not part of a 
project in the first place. I found some other local files being mentioned in 
.gitignore, and thus considered adding this one. In general it's a common 
practice to gitignore files like that. I can work it out differently if it's 
not 
to be here.

> >  .gitignore | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/.gitignore b/.gitignore
> > index 26ed8d3d0..3c7250159 100644
> > --- a/.gitignore
> > +++ b/.gitignore
> > @@ -79,3 +79,4 @@ testsuite.tmp.orig
> > 
> >  /Documentation/_build
> >  /.venv
> >  /cxx-check
> > 
> > +/compile_flags.txt




___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 4/4] ovsdb: Optimize monitor update by directly serializing data into ds.

2024-06-25 Thread 0-day Robot
Bleep bloop.  Greetings Grigorii Nazarov, I am a robot and I have tried out 
your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Inappropriate bracing around statement
#332 FILE: ovsdb/monitor.c:1010:
if (!c->monitored || !(type & c->select))  {

ERROR: Inappropriate bracing around statement
#372 FILE: ovsdb/monitor.c:1041:
if (!c->monitored || !(type & c->select))  {

Lines checked: 595, Warnings: 0, Errors: 2


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 3/4] lib/json: simplify string serialization code

2024-06-25 Thread Grigorii Nazarov
On Thursday, June 20, 2024 9:58:41 PM GMT+3 Aaron Conole wrote:
> There's only one user left in tree.  Maybe it makes sense to remove
> this?  Or at least mark it deprecated or something?  It is used quite a
> bit in OVN project.  It doesn't seem like it is worth the overhead of
> the extra function call in OVS, though.  At the very least, maybe it can
> become an inline function in the json.h file.
> 
Refactoring wasn't primer focus of my work, only optimization. Fourth patch in 
the series adds one more usage. From the perspective of code organization, 
exposing json_serialize_string might be undesired. From the perspective of 
further optimization, the very serialization process is much more expensive, 
than that call. Moreover, there are other places that are way more expensive, 
UUID serialization is one example. I considered this to be a low hanging 
improvement that I can include.

All that said, I will happily do some small refactoring regarding this 
function as to make code the clearer. I'm just not sure myself which way is 
preferable here. Should I make it inline in header or replace with 
json_serialize_string?

> >  lib/json.c | 11 ---
> >  1 file changed, 4 insertions(+), 7 deletions(-)
> > 
> > diff --git a/lib/json.c b/lib/json.c
> > index 001f6e6ab..d40e93857 100644
> > --- a/lib/json.c
> > +++ b/lib/json.c
> > @@ -127,7 +127,9 @@ static void json_parser_input(struct json_parser *,
> > struct json_token *);> 
> >  static void json_error(struct json_parser *p, const char *format, ...)
> >  
> >  OVS_PRINTF_FORMAT(2, 3);
> > 
> > -
> > +
> > +static void json_serialize_string(const char *, struct ds *);
> > +
> > 
> >  const char *
> >  json_type_to_string(enum json_type type)
> >  {
> > 
> > @@ -987,11 +989,7 @@ exit:
> >  void
> >  json_string_escape(const char *in, struct ds *out)
> >  {
> > 
> > -struct json json = {
> > -.type = JSON_STRING,
> > -.string = CONST_CAST(char *, in),
> > -};
> > -json_to_ds(&json, 0, out);
> > +json_serialize_string(in, out);
> > 
> >  }
> >  
> >  static void
> > 
> > @@ -1572,7 +1570,6 @@ static void json_serialize_object(const struct shash
> > *object,> 
> >struct json_serializer *);
> >  
> >  static void json_serialize_array(const struct json_array *,
> >  
> >   struct json_serializer *);
> > 
> > -static void json_serialize_string(const char *, struct ds *);
> > 
> >  /* Converts 'json' to a string in JSON format, encoded in UTF-8, and
> >  returns>  
> >   * that string.  The caller is responsible for freeing the returned
> >   string,




___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 net-next 0/7] selftests: net: Switch pmtu.sh to use the internal ovs script.

2024-06-25 Thread Paolo Abeni
On Mon, 2024-06-24 at 12:53 -0400, Aaron Conole wrote:
> Aaron Conole  writes:
> 
> > Jakub Kicinski  writes:
> > 
> > > On Thu, 20 Jun 2024 08:55:54 -0400 Aaron Conole wrote:
> > > > This series enhances the ovs-dpctl utility to provide support for set()
> > > > and tunnel() flow specifiers, better ipv6 handling support, and the
> > > > ability to add tunnel vports, and LWT interfaces.  Finally, it modifies
> > > > the pmtu.sh script to call the ovs-dpctl.py utility rather than the
> > > > typical OVS userspace utilities.
> > > 
> > > Thanks for the work! 
> > > 
> > > Looks like the series no longer applies because of other changes
> > > to the kernel config. Before it stopped applying we got some runs,
> > > here's what I see:
> > > 
> > > https://netdev-3.bots.linux.dev/vmksft-net/results/648440/3-pmtu-sh/stdout
> > > 
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv4, OVS vxlan4: PMTU exceptions 
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv4, OVS vxlan4: PMTU exceptions - nexthop objects   
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv6, OVS vxlan4: PMTU exceptions 
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv6, OVS vxlan4: PMTU exceptions - nexthop objects   
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv4, OVS vxlan6: PMTU exceptions 
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv4, OVS vxlan6: PMTU exceptions - nexthop objects   
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv6, OVS vxlan6: PMTU exceptions 
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv6, OVS vxlan6: PMTU exceptions - nexthop objects   
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv4, OVS geneve4: PMTU exceptions
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv4, OVS geneve4: PMTU exceptions - nexthop objects  
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv6, OVS geneve4: PMTU exceptions
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv6, OVS geneve4: PMTU exceptions - nexthop objects  
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv4, OVS geneve6: PMTU exceptions
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv4, OVS geneve6: PMTU exceptions - nexthop objects  
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > # TEST: IPv6, OVS geneve6: PMTU exceptions
> > > [FAIL]
> > > # Cannot find device "ovs_br0"
> > > 
> > > Any idea why? Looks like kernel config did include OVS, perhaps we need
> > > explicit modprobe now? I don't see any more details in the logs.
> > 
> > Strange.  I expected that the module should have automatically been
> > loaded when attempting to communicate with the OVS genetlink family
> > type.  At least, that is how it had been working previously.
> > 
> > I'll spend some time looking into it and resubmit a rebased version.
> > Thanks, Jakub!
> 
> If the ovs module isn't available, then I see:
> 
> #   ovs_bridge not supported
> # TEST: IPv4, OVS vxlan4: PMTU exceptions [SKIP]
> 
> But if it is available, I haven't been able to reproduce such ovs_br0
> setup failure - things work.

I'm still wondering if the issue is Kconfig-related (plus possibly bad
interaction with vng). I don't see the OVS knob enabled in the self-
tests config. If it's implied by some other knob, and ends-up being
selected as a module, vng could stumble upon loading the module at
runtime, especially on incremental build (at least I experience that
problem locally). I'm not even sure if the KCI is building
incrementally or not, so all the above could is quite a wild guess.

In any case I think adding the explicit CONFIG_OPENVSWITCH=y the
selftest config would make the scenario more well defined.

Cheers,

Paolo

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 4/4] ovsdb: Optimize monitor update by directly serializing data into ds.

2024-06-25 Thread Grigorii Nazarov
Currently serialization is performed by first converting
the internal data representation into JSON objects, followed by
serializing these objects by jsonrpc. This process results in lots of
allocations for these intermediate objects. Consequently, this
not only increases peak memory consumption, but also
demands significantly more CPU work.

By forming row-update JSONs directly in `ds`, which is then used
to create 'serialized object' JSONs, the overall speed increased
by a factor of 2.3.

A local benchmark was run on a proprietary Southbound backup.
Both versions, before and after applying the patch, were measured.
For each version, there were two runs with 10 parallel clients,
and two runs with 30 parallel clients. CPU time was recorded
after startup (before clients started running) and after
all clients received all updates. Clients were essentially running
`ovsdb-client monitor-cond unix:pipe OVN_Southbound
'[[\"actions\",\"!=\",\"sdfdfsd\"]]' Logical_Flow`.
Similar results were obtained with other requests
that required a significant amount of data transfer.
The backup size is about 600 MB. Results are measured in seconds.

Before  After
Baseline x10:   9.53108.54
Baseline x10:   9.62108.67
Baseline x30:   9.69307.04
Baseline x30:   9.65303.32
Patch x10:  9.6752.57
Patch x10:  9.5753.12
Patch x30:  9.53136.33
Patch x30:  9.63135.88

Signed-off-by: Grigorii Nazarov 
---
 include/openvswitch/json.h |   1 +
 lib/json.c |   8 ++
 lib/ovsdb-data.c   | 105 +++
 lib/ovsdb-data.h   |   3 +
 ovsdb/monitor.c| 210 -
 5 files changed, 254 insertions(+), 73 deletions(-)

diff --git a/include/openvswitch/json.h b/include/openvswitch/json.h
index 555440760..80b9479c7 100644
--- a/include/openvswitch/json.h
+++ b/include/openvswitch/json.h
@@ -81,6 +81,7 @@ struct json *json_boolean_create(bool);
 struct json *json_string_create(const char *);
 struct json *json_string_create_nocopy(char *);
 struct json *json_serialized_object_create(const struct json *);
+struct json *json_serialized_object_create_from_string(const char *);
 struct json *json_integer_create(long long int);
 struct json *json_real_create(double);
 
diff --git a/lib/json.c b/lib/json.c
index d40e93857..66b1f571f 100644
--- a/lib/json.c
+++ b/lib/json.c
@@ -199,6 +199,14 @@ json_serialized_object_create(const struct json *src)
 return json;
 }
 
+struct json *
+json_serialized_object_create_from_string(const char *s)
+{
+struct json *json = json_create(JSON_SERIALIZED_OBJECT);
+json->string = xstrdup(s);
+return json;
+}
+
 struct json *
 json_serialized_object_create_with_yield(const struct json *src)
 {
diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
index defb048d7..f32b7975a 100644
--- a/lib/ovsdb-data.c
+++ b/lib/ovsdb-data.c
@@ -492,6 +492,45 @@ ovsdb_atom_to_json__(const union ovsdb_atom *atom, enum 
ovsdb_atomic_type type,
 }
 }
 
+/* Serializes 'atom', of the specified 'type', into 'ds' in JSON format.
+ *
+ * Refer to RFC 7047 for the format of the JSON that this function produces. */
+static void
+ovsdb_atom_to_json_ds(const union ovsdb_atom *atom,
+  enum ovsdb_atomic_type type, struct ds *ds)
+{
+switch (type) {
+case OVSDB_TYPE_VOID:
+OVS_NOT_REACHED();
+
+case OVSDB_TYPE_INTEGER:
+ds_put_format(ds, "%lld", (long long) atom->integer);
+return;
+
+case OVSDB_TYPE_REAL:
+ds_put_format(ds, "%.*g", DBL_DIG, atom->real);
+return;
+
+case OVSDB_TYPE_BOOLEAN:
+ds_put_cstr(ds, atom->boolean ? "true" : "false");
+return;
+
+case OVSDB_TYPE_STRING:
+json_to_ds(atom->s, JSSF_SORT, ds);
+return;
+
+case OVSDB_TYPE_UUID:
+ds_put_cstr(ds, "[\"uuid\",\"");
+ds_put_format(ds, UUID_FMT, UUID_ARGS(&atom->uuid));
+ds_put_cstr(ds, "\"]");
+return;
+
+case OVSDB_N_TYPES:
+default:
+OVS_NOT_REACHED();
+}
+}
+
 struct json *
 ovsdb_atom_to_json(const union ovsdb_atom *atom, enum ovsdb_atomic_type type)
 {
@@ -1445,6 +1484,23 @@ ovsdb_base_to_json(const union ovsdb_atom *atom,
 }
 }
 
+static void
+ovsdb_base_to_json_ds(const union ovsdb_atom *atom,
+  const struct ovsdb_base_type *base,
+  bool use_row_names,
+  struct ds *ds)
+{
+if (!use_row_names
+|| base->type != OVSDB_TYPE_UUID
+|| !base->uuid.refTableName) {
+ovsdb_atom_to_json_ds(atom, base->type, ds);
+} else {
+ds_put_cstr(ds, "[\"named-uuid\",\"");
+ds_put_format(ds, UUID_ROW_FMT, UUID_ARGS(&atom->uuid));
+ds_put_cstr(ds, "\"]");
+}
+}
+
 static struct json *
 ovsdb_datum_to_json__(const struct ovsdb_datum *datum,
   const struct ovsdb_type *type,
@@ -1482,6 +1538,47 @@ ovsdb_datum_to_json__(const struct ovsdb_d

[ovs-dev] [PATCH v2 2/4] ovsdb: Simplify UUID formatting code.

2024-06-25 Thread Grigorii Nazarov
Signed-off-by: Grigorii Nazarov 
---
 lib/ovsdb-data.c | 8 +---
 lib/uuid.h   | 1 +
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
index abb923ad8..defb048d7 100644
--- a/lib/ovsdb-data.c
+++ b/lib/ovsdb-data.c
@@ -2582,14 +2582,8 @@ char *
 ovsdb_data_row_name(const struct uuid *uuid)
 {
 char *name;
-char *p;
 
-name = xasprintf("row"UUID_FMT, UUID_ARGS(uuid));
-for (p = name; *p != '\0'; p++) {
-if (*p == '-') {
-*p = '_';
-}
-}
+name = xasprintf(UUID_ROW_FMT, UUID_ARGS(uuid));
 
 return name;
 }
diff --git a/lib/uuid.h b/lib/uuid.h
index fa49354f6..6a8069f68 100644
--- a/lib/uuid.h
+++ b/lib/uuid.h
@@ -34,6 +34,7 @@ extern "C" {
  */
 #define UUID_LEN 36
 #define UUID_FMT "%08x-%04x-%04x-%04x-%04x%08x"
+#define UUID_ROW_FMT "row%08x_%04x_%04x_%04x_%04x%08x"
 #define UUID_ARGS(UUID) \
 ((unsigned int) ((UUID)->parts[0])),\
 ((unsigned int) ((UUID)->parts[1] >> 16)),  \
-- 
2.45.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 3/4] lib/json: Simplify string serialization code.

2024-06-25 Thread Grigorii Nazarov
Signed-off-by: Grigorii Nazarov 
---
 lib/json.c | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/lib/json.c b/lib/json.c
index 001f6e6ab..d40e93857 100644
--- a/lib/json.c
+++ b/lib/json.c
@@ -127,7 +127,9 @@ static void json_parser_input(struct json_parser *, struct 
json_token *);
 
 static void json_error(struct json_parser *p, const char *format, ...)
 OVS_PRINTF_FORMAT(2, 3);
-
+
+static void json_serialize_string(const char *, struct ds *);
+
 const char *
 json_type_to_string(enum json_type type)
 {
@@ -987,11 +989,7 @@ exit:
 void
 json_string_escape(const char *in, struct ds *out)
 {
-struct json json = {
-.type = JSON_STRING,
-.string = CONST_CAST(char *, in),
-};
-json_to_ds(&json, 0, out);
+json_serialize_string(in, out);
 }
 
 static void
@@ -1572,7 +1570,6 @@ static void json_serialize_object(const struct shash 
*object,
   struct json_serializer *);
 static void json_serialize_array(const struct json_array *,
  struct json_serializer *);
-static void json_serialize_string(const char *, struct ds *);
 
 /* Converts 'json' to a string in JSON format, encoded in UTF-8, and returns
  * that string.  The caller is responsible for freeing the returned string,
-- 
2.45.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 1/4] .gitignore: Add clangd configuration file.

2024-06-25 Thread Grigorii Nazarov
Signed-off-by: Grigorii Nazarov 
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 26ed8d3d0..3c7250159 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,3 +79,4 @@ testsuite.tmp.orig
 /Documentation/_build
 /.venv
 /cxx-check
+/compile_flags.txt
-- 
2.45.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 ovn 2/3] northd: Add nexhop id in ct_label.label.

2024-06-25 Thread Lorenzo Bianconi
> On Thu, Jun 6, 2024 at 8:35 PM Lorenzo Bianconi 
> wrote:
> 
> > Introduce the nexthop identifier in the ct_label.label field for
> > ecmp-symmetric replies connections. This field will be used by
> > ovn-controller to track ct entries and to flush them if requested by the
> > CMS (e.g. removing the related static routes).
> >
> > Signed-off-by: Lorenzo Bianconi 
> >
> 
> Hi Lorenzo,
> 
> thank you for the patch. I have one small nit down below.
> 
> 
> > ---
> >  northd/en-lflow.c|  3 +++
> >  northd/inc-proc-northd.c |  1 +
> >  northd/northd.c  | 41 +---
> >  northd/northd.h  |  1 +
> >  tests/ovn.at |  4 +--
> >  tests/system-ovn.at  | 58 +++-
> >  6 files changed, 72 insertions(+), 36 deletions(-)
> >
> > diff --git a/northd/en-lflow.c b/northd/en-lflow.c
> > index 3dba5034b..b4df49076 100644
> > --- a/northd/en-lflow.c
> > +++ b/northd/en-lflow.c
> > @@ -54,6 +54,8 @@ lflow_get_input_data(struct engine_node *node,
> >  engine_get_input_data("lr_stateful", node);
> >  struct ed_type_ls_stateful *ls_stateful_data =
> >  engine_get_input_data("ls_stateful", node);
> > +struct ecmp_nexthop_data *nexthop_data =
> > +engine_get_input_data("ecmp_nexthop", node);
> >
> >  lflow_input->sbrec_logical_flow_table =
> >  EN_OVSDB_GET(engine_get_input("SB_logical_flow", node));
> > @@ -83,6 +85,7 @@ lflow_get_input_data(struct engine_node *node,
> >  lflow_input->parsed_routes = &static_routes_data->parsed_routes;
> >  lflow_input->route_tables = &static_routes_data->route_tables;
> >  lflow_input->route_policies = &route_policies_data->route_policies;
> > +lflow_input->nexthops_table = &nexthop_data->nexthops;
> >
> >  struct ed_type_global_config *global_config =
> >  engine_get_input_data("global_config", node);
> > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > index c4e5b9bf6..3d4bfa175 100644
> > --- a/northd/inc-proc-northd.c
> > +++ b/northd/inc-proc-northd.c
> > @@ -281,6 +281,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >  engine_add_input(&en_lflow, &en_route_policies, NULL);
> >  engine_add_input(&en_lflow, &en_static_routes, NULL);
> >  engine_add_input(&en_lflow, &en_bfd, NULL);
> > +engine_add_input(&en_lflow, &en_ecmp_nexthop, NULL);
> >  engine_add_input(&en_lflow, &en_northd, lflow_northd_handler);
> >  engine_add_input(&en_lflow, &en_port_group, lflow_port_group_handler);
> >  engine_add_input(&en_lflow, &en_lr_stateful,
> > lflow_lr_stateful_handler);
> > diff --git a/northd/northd.c b/northd/northd.c
> > index efe1e3f46..0e7ff0df1 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -10903,7 +10903,8 @@ add_ecmp_symmetric_reply_flows(struct lflow_table
> > *lflows,
> > struct ovn_port *out_port,
> > const struct parsed_route *route,
> > struct ds *route_match,
> > -   struct lflow_ref *lflow_ref)
> > +   struct lflow_ref *lflow_ref,
> > +   struct hmap *nexthops_table)
> >  {
> >  const struct nbrec_logical_router_static_route *st_route =
> > route->route;
> >  struct ds match = DS_EMPTY_INITIALIZER;
> > @@ -10939,15 +10940,28 @@ add_ecmp_symmetric_reply_flows(struct
> > lflow_table *lflows,
> >   * ds_put_cstr() call. The previous contents are needed.
> >   */
> >  ds_put_cstr(&match, " && !ct.rpl && (ct.new || ct.est)");
> > +struct ds nexthop_label = DS_EMPTY_INITIALIZER;
> >
> 
> nit: There isn't any need for the extra "struct ds". You can find the entry
> in the loop below and use it in ds_put_format directly.

ack, I will fix it.

Regards,
Lorenzo

> 
> +
> > +struct ecmp_nexthop_entry *e;
> > +HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash_string(st_route->nexthop,
> > 0),
> > + nexthops_table) {
> > +if (!strcmp(st_route->nexthop, e->nexthop)) {
> > +ds_put_format(&nexthop_label, "ct_label.label = %d;", e->id);
> > +break;
> > +}
> > +}
> > +
> >  ds_put_format(&actions,
> >  "ct_commit { ct_label.ecmp_reply_eth = eth.src; "
> > -" %s = %" PRId64 ";}; "
> > +" %s = %" PRId64 "; %s }; "
> >  "next;",
> > -ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
> > +ct_ecmp_reply_port_match, out_port->sb->tunnel_key,
> > +ds_cstr(&nexthop_label));
> >  ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100,
> >  ds_cstr(&match), ds_cstr(&actions),
> >  &st_route->header_,
> >  lflow_ref);
> > +ds_destroy(&nexthop_label);
> >
> >  /* Bypass ECMP selection if we already have ct

Re: [ovs-dev] [PATCH v3 ovn 3/3] ofctrl: Introduce ecmp_nexthop_monitor.

2024-06-25 Thread Lorenzo Bianconi
On Jun 24, Ales Musil wrote:
> On Thu, Jun 6, 2024 at 8:35 PM Lorenzo Bianconi 
> wrote:
> 
> > Introduce ecmp_nexthop_monitor in ovn-controller in order to track and
> > flush ecmp-symmetric reply ct entires when requested by the CMS (e.g
> > removing the related static routes).
> >
> > Signed-off-by: Lorenzo Bianconi 
> > ---
> >
> 
> Hi Lorenzo,
> 
> thank you for the patch. I have some comments below.

Hi Ales,

thx for the review.

> 
> 
> >  controller/ofctrl.c | 101 ++
> >  controller/ofctrl.h |   2 +
> >  controller/ovn-controller.c |   2 +
> >  tests/system-ovn-kmod.at| 266 
> >  tests/system-ovn.at |   4 +
> >  5 files changed, 375 insertions(+)
> >
> > diff --git a/controller/ofctrl.c b/controller/ofctrl.c
> > index 9d181a782..826f78a85 100644
> > --- a/controller/ofctrl.c
> > +++ b/controller/ofctrl.c
> > @@ -388,9 +388,24 @@ struct meter_band_entry {
> >
> >  static struct shash meter_bands;
> >
> > +static struct hmap ecmp_nexthop_map;
> > +struct ecmp_nexthop_entry {
> > +struct hmap_node node;
> > +bool erase;
> > +
> > +char *nexthop;
> > +int id;
> > +};
> > +
> >
> 
> Do we actually need this struct? It should be enough to keep the up-to-date
> ids inside of bitmap.

ack, I will fix it.

> 
> 
> >  static void ofctrl_meter_bands_destroy(void);
> >  static void ofctrl_meter_bands_clear(void);
> >
> > +static void ecmp_nexthop_monitor_destroy(void);
> > +static void ecmp_nexthop_monitor_run(
> > +const struct sbrec_ecmp_nexthop_table *enh_table,
> > +struct ovs_list *msgs);
> > +
> > +
> >  /* MFF_* field ID for our Geneve option.  In S_TLV_TABLE_MOD_SENT, this is
> >   * the option we requested (we don't know whether we obtained it yet).  In
> >   * S_CLEAR_FLOWS or S_UPDATE_FLOWS, this is really the option we have. */
> > @@ -429,6 +444,7 @@ ofctrl_init(struct ovn_extend_table *group_table,
> >  groups = group_table;
> >  meters = meter_table;
> >  shash_init(&meter_bands);
> > +hmap_init(&ecmp_nexthop_map);
> >  }
> >
[...]
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 6874f99a3..d72dc8fef 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -6076,6 +6076,8 @@ main(int argc, char *argv[])
> > &ct_zones_data->pending,
> > &lb_data->removed_tuples,
> > sbrec_meter_by_name,
> > +   sbrec_ecmp_nexthop_table_get(
> > +ovnsb_idl_loop.idl),
> > ofctrl_seqno_get_req_cfg(),
> > engine_node_changed(&en_lflow_output),
> > engine_node_changed(&en_pflow_output));
> > diff --git a/tests/system-ovn-kmod.at b/tests/system-ovn-kmod.at
> > index 63ecc7ff4..142c4ea6f 100644
> > --- a/tests/system-ovn-kmod.at
> > +++ b/tests/system-ovn-kmod.at
> > @@ -1055,3 +1055,269 @@ OVS_TRAFFIC_VSWITCHD_STOP(["
> >  "])
> >  AT_CLEANUP
> >  ])
> > +
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([ECMP symmetric reply - kmod])
> >
> 
>  Why doesn't it work with userspace datapath?

it was because we spotted this OVS issue:

https://patchwork.ozlabs.org/project/openvswitch/patch/20240312100255.498965-1-pvale...@redhat.com/

It is fixed now, so we can have both of them.

Regards,
Lorenzo

> 
> 
> > +AT_KEYWORDS([ecmp])
> > +
> > +CHECK_CONNTRACK()
> > +ovn_start
> > +
> > +OVS_TRAFFIC_VSWITCHD_START()
> > +ADD_BR([br-int])
> > +
> > +# Set external-ids in br-int needed for ovn-controller
> > +ovs-vsctl \
> > +-- set Open_vSwitch . external-ids:system-id=hv1 \
> > +-- set Open_vSwitch .
> > external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> > +-- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> > +-- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> > +-- set bridge br-int fail-mode=secure
> > other-config:disable-in-band=true
> > +
> > +# Start ovn-controller
> > +start_daemon ovn-controller
> > +
> > +# Logical network:
> > +# Alice is connected to gateway router R1. R1 is connected to two
> > "external"
> > +# routers, R2 and R3 via an "ext" switch.
> > +# Bob is connected to both R2 and R3. R1 contains two ECMP routes, one
> > through R2
> > +# and one through R3, to Bob.
> > +#
> > +# alice -- R1 -- ext  R2
> > +# | \
> > +# |   bob
> > +# | /
> > +# + - R3
> > +#
> > +# For this test, Bob sends request traffic through R2 to Alice. We want
> > to ensure that
> > +# all response traffic from Alice is routed through R2 as well.
> > +
> > +ovn-nbctl create Logical_Router name=R1 options:chassis=hv1
> > +ovn-nbctl create Logical_Router name=R2
> > +ovn-nbctl create L