Re: [ovs-dev] [PATCH] netdev-offload-tc: Flush rules on all chains before attach ingress block

2021-02-22 Thread Roi Dayan

Hi,

Any comments or thoughts on this patch?

Thanks,
Roi


On 2021-01-26 9:17 AM, Roi Dayan wrote:

From: Jianbo Liu 

Previously, only chain 0 is deleted before attach the ingress block.
If there are rules on the chain other than 0, these rules are not flushed.
In this case, the recreation of qdisc also fails. To fix this issue, flush
rules from all chains.

Signed-off-by: Jianbo Liu 
Reviewed-by: Roi Dayan 
---
  lib/netdev-offload-tc.c | 85 ++---
  lib/tc.c| 39 +++
  lib/tc.h|  2 ++
  3 files changed, 121 insertions(+), 5 deletions(-)

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 717a987d14d8..bd7aa7725ad3 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -55,6 +55,11 @@ struct netlink_field {
  int size;
  };
  
+struct chain_node {

+struct hmap_node node;
+uint32_t chain;
+};
+
  static bool
  is_internal_port(const char *type)
  {
@@ -345,6 +350,69 @@ get_block_id_from_netdev(struct netdev *netdev)
  }
  
  static int

+get_chains_from_netdev(struct netdev *netdev, struct tcf_id *id,
+   struct hmap *map)
+{
+struct netdev_flow_dump *dump;
+struct chain_node *chain_node;
+struct ofpbuf rbuffer, reply;
+uint32_t chain;
+size_t hash;
+int err;
+
+dump = xzalloc(sizeof *dump);
+dump->nl_dump = xzalloc(sizeof *dump->nl_dump);
+dump->netdev = netdev_ref(netdev);
+
+ofpbuf_init(&rbuffer, NL_DUMP_BUFSIZE);
+tc_dump_tc_chain_start(id, dump->nl_dump);
+
+while (nl_dump_next(dump->nl_dump, &reply, &rbuffer)) {
+if (parse_netlink_to_tc_chain(&reply, &chain)) {
+continue;
+}
+
+chain_node = xzalloc(sizeof *chain_node);
+chain_node->chain = chain;
+hash = hash_int(chain, 0);
+hmap_insert(map, &chain_node->node, hash);
+}
+
+err = nl_dump_done(dump->nl_dump);
+ofpbuf_uninit(&rbuffer);
+netdev_close(netdev);
+free(dump->nl_dump);
+free(dump);
+
+return err;
+}
+
+static int
+delete_chains_from_netdev(struct netdev *netdev, struct tcf_id *id)
+{
+struct chain_node *chain_node;
+struct hmap map;
+int error;
+
+hmap_init(&map);
+error = get_chains_from_netdev(netdev, id, &map);
+
+if (!error) {
+/* Flush rules explicitly needed when we work with ingress_block,
+ * so we will not fail with reattaching block to bond iface, for ex.
+ */
+HMAP_FOR_EACH_POP (chain_node, node, &map) {
+id->chain = chain_node->chain;
+tc_del_filter(id);
+free(chain_node);
+}
+}
+
+hmap_destroy(&map);
+return error;
+}
+
+static int
  netdev_tc_flow_flush(struct netdev *netdev)
  {
  struct ufid_tc_data *data, *next;
@@ -2008,6 +2076,7 @@ netdev_tc_init_flow_api(struct netdev *netdev)
  {
  static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
  enum tc_qdisc_hook hook = get_tc_qdisc_hook(netdev);
+static bool get_chain_supported = true;
  uint32_t block_id = 0;
  struct tcf_id id;
  int ifindex;
@@ -2021,12 +2090,18 @@ netdev_tc_init_flow_api(struct netdev *netdev)
  }
  
  block_id = get_block_id_from_netdev(netdev);

-
-/* Flush rules explicitly needed when we work with ingress_block,
- * so we will not fail with reattaching block to bond iface, for ex.
- */
  id = tc_make_tcf_id(ifindex, block_id, 0, hook);
-tc_del_filter(&id);
+
+if (get_chain_supported) {
+if (delete_chains_from_netdev(netdev, &id)) {
+get_chain_supported = false;
+}
+}
+
+/* fallback here if delete chains fail */
+if (!get_chain_supported) {
+tc_del_filter(&id);
+}
  
  /* make sure there is no ingress/egress qdisc */

  tc_add_del_qdisc(ifindex, false, 0, hook);
diff --git a/lib/tc.c b/lib/tc.c
index c2de78bfe347..74a8113eaa0f 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -61,6 +61,10 @@
  #define TCA_DUMP_FLAGS 15
  #endif
  
+#ifndef RTM_GETCHAIN

+#define RTM_GETCHAIN 102
+#endif
+
  VLOG_DEFINE_THIS_MODULE(tc);
  
  static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);

@@ -297,6 +301,10 @@ static const struct nl_policy tca_policy[] = {
  [TCA_STATS2] = { .type = NL_A_NESTED, .optional = true, },
  };
  
+static const struct nl_policy tca_chain_policy[] = {

+[TCA_CHAIN] = { .type = NL_A_U32, .optional = false, },
+};
+
  static const struct nl_policy tca_flower_policy[] = {
  [TCA_FLOWER_CLASSID] = { .type = NL_A_U32, .optional = true, },
  [TCA_FLOWER_INDEV] = { .type = NL_A_STRING, .max_len = IFNAMSIZ,
@@ -1864,6 +1872,25 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct 
tcf_id *id,
  }
  
  int

+parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
+{
+struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
+struct tcmsg *tc;
+
+tc = ofpbuf_at_assert(reply, NLMSG_HD

Re: [ovs-dev] Does the OVS command line support on-root users?

2021-02-22 Thread Ben Pfaff
On Tue, Feb 23, 2021 at 02:52:22AM +, wangyunjian wrote:
> non-root users can use ovs-vsctl, but the following security problems may 
> exist. Because /usr/share/openvswitch/scripts/ovs-ctl will call ovs-appctl, 
> ovs-vsctl etc., and ovs-ctl is called by the OVS service as root. In this 
> case, the following paths of attack exist:
> 1. non-root user tamper with the contents of ovs-vsctl to execute arbitrary 
> bash commands, such as `reboot`;
> 2. When the ovs-ctl script is called by the OVS service as root, the 
> ovs-vsctl command will be executed, and then the reboot is triggered. 
> Originally, non-root users are not entitled to execute reboot, but through 
> this attack can be successfully executed, there is a risk of raising 
> privilege.

I'd generally consider a scenario like the above to be a bug in
ovs-ctl.  Do you have a specific vulnerability to report?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] Does the OVS command line support on-root users?

2021-02-22 Thread wangyunjian
non-root users can use ovs-vsctl, but the following security problems may 
exist. Because /usr/share/openvswitch/scripts/ovs-ctl will call ovs-appctl, 
ovs-vsctl etc., and ovs-ctl is called by the OVS service as root. In this case, 
the following paths of attack exist:
1. non-root user tamper with the contents of ovs-vsctl to execute arbitrary 
bash commands, such as `reboot`;
2. When the ovs-ctl script is called by the OVS service as root, the ovs-vsctl 
command will be executed, and then the reboot is triggered. Originally, 
non-root users are not entitled to execute reboot, but through this attack can 
be successfully executed, there is a risk of raising privilege.

Thanks

From: Vasu Dasari [mailto:vdas...@gmail.com]
Sent: Monday, February 22, 2021 11:13 PM
To: wangyunjian 
Cc: ovs-discuss-boun...@openvswitch.org; d...@openvswitch.org; dingxiaoxiong 
; Wangqian (Euler) ; 
chenchanghu 
Subject: Re: [ovs-dev] Does the OVS command line support on-root users?

You should be able to use ovs-vsctl commands as non-root users on a vswitch 
started as a non-root user. For example you can look at how any of the unit 
test cases are executed in ovs/tests/ofproto.at.

For example, you can run one of the test cases as,
$ cd $ovs_src_dir
$ make check TESTSUITEFLAGS='-k "ofproto - echo request"'



Vasu Dasari


On Mon, Feb 22, 2021 at 3:49 AM wangyunjian 
mailto:wangyunj...@huawei.com>> wrote:
Hi all:
  I have a question to consult: I see that OVS daemon has
been supported to run as non-root in 2015 with the patch
"lib/daemon: support --user option for all OVS daemon",
I would like to know whether the openvswitch command line
such as ovs-vsctl/ovs-appctl will be called as non-root
in any plan, and is there any consideration for calling
as root at present?

Thanks
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] Fail to compile the git master version of openvswitch on Ubuntu 20.04.

2021-02-22 Thread Hongyi Zhao
On Tue, Feb 23, 2021 at 3:35 AM Ben Pfaff  wrote:
>
> On Mon, Feb 22, 2021 at 12:00:24PM +0800, Hongyi Zhao wrote:
> > I try to compile the git master version of openvswitch on Ubuntu 20.04
> > with the following steps:
> >
> > $ sudo apt-get build-dep openvswitch-switch
> > $ git clone https://github.com/openvswitch/ovs.git openvswitch/ovs.git
> > $ cd openvswitch/ovs.git/
> > $ ./boot.sh
> > $ ./configure
> > $ make -j1
> > [...]
> > make  all-recursive
> > make[1]: Entering directory
> > '/home/werner/Public/repo/github.com/openvswitch/ovs.git'
> > Making all in datapath
> > make[2]: Entering directory
> > '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> > make[3]: Entering directory
> > '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> > make[3]: Leaving directory
> > '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> > make[2]: Leaving directory
> > '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> > make[2]: Entering directory
> > '/home/werner/Public/repo/github.com/openvswitch/ovs.git'
> > make[3]: Entering directory
> > '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> > make[3]: 'distfiles' is up to date.
> > make[3]: Leaving directory
> > '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> > See above for list of violations of the rule that
> > every C source file must #include .
> > make[2]: *** [Makefile:6475: config-h-check] Error 1
> > make[2]: Leaving directory
> > '/home/werner/Public/repo/github.com/openvswitch/ovs.git'
> > make[1]: *** [Makefile:5252: all-recursive] Error 1
> > make[1]: Leaving directory
> > '/home/werner/Public/repo/github.com/openvswitch/ovs.git'
> > make: *** [Makefile:2997: all] Error 2
>
> I can't reproduce this and no one else has reported it.  If you can't
> otherwise figure out the problem, you can delete the lines for it from
> Makefile.am.

Thanks for your reply. But I still can't figure out which lines I
should delete from Makefile.am.

Regards
-- 
Assoc. Prof. Hongyi Zhao 
Theory and Simulation of Materials
Hebei Polytechnic University of Science and Technology engineering
NO. 552 North Gangtie Road, Xingtai, China
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v1] Static Routes: Add ability to specify "discard" nexthop

2021-02-22 Thread 0-day Robot
References:  <20210222194015.95508-1-svc.eng.git-m...@nutanix.com>
 

Bleep bloop.  Greetings , I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


git-am:
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch' to see the failed patch
Patch failed at 0001 Static Routes: Add ability to specify "discard" nexthop
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v1] Static Routes: Add ability to specify "discard" nexthop

2021-02-22 Thread svc . eng . git-mail
From: karthik-kc 

Physical switches have the ability to specify "discard" or sometimes
"NULL interface" as a nexthop for routes. This can be used to prevent
routing loops in the network. Add a similar configuration for ovn
where nexthop accepts the string "discard". When the nexthop is discard
the action in the routing table will be to drop the packets.

Signed-off-by: Karthik Chandrashekar 
---
 northd/ovn-northd.c   | 126 +++---
 ovn-nb.xml|   4 +-
 tests/ovn-nbctl.at|  13 +
 tests/ovn.at  |  93 +++
 utilities/ovn-nbctl.c |  50 -
 5 files changed, 215 insertions(+), 71 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 39d798782..18d0e0b43 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -7749,6 +7749,7 @@ struct parsed_route {
 uint32_t hash;
 const struct nbrec_logical_router_static_route *route;
 bool ecmp_symmetric_reply;
+bool is_discard_route;
 };
 
 static uint32_t
@@ -7768,20 +7769,23 @@ parsed_routes_add(struct ovs_list *routes,
 /* Verify that the next hop is an IP address with an all-ones mask. */
 struct in6_addr nexthop;
 unsigned int plen;
-if (!ip46_parse_cidr(route->nexthop, &nexthop, &plen)) {
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-VLOG_WARN_RL(&rl, "bad 'nexthop' %s in static route"
- UUID_FMT, route->nexthop,
- UUID_ARGS(&route->header_.uuid));
-return NULL;
-}
-if ((IN6_IS_ADDR_V4MAPPED(&nexthop) && plen != 32) ||
-(!IN6_IS_ADDR_V4MAPPED(&nexthop) && plen != 128)) {
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-VLOG_WARN_RL(&rl, "bad next hop mask %s in static route"
- UUID_FMT, route->nexthop,
- UUID_ARGS(&route->header_.uuid));
-return NULL;
+bool is_discard_route = !strcmp(route->nexthop, "discard");
+if (!is_discard_route) {
+if (!ip46_parse_cidr(route->nexthop, &nexthop, &plen)) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+VLOG_WARN_RL(&rl, "bad 'nexthop' %s in static route"
+ UUID_FMT, route->nexthop,
+ UUID_ARGS(&route->header_.uuid));
+return NULL;
+}
+if ((IN6_IS_ADDR_V4MAPPED(&nexthop) && plen != 32) ||
+(!IN6_IS_ADDR_V4MAPPED(&nexthop) && plen != 128)) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+VLOG_WARN_RL(&rl, "bad next hop mask %s in static route"
+ UUID_FMT, route->nexthop,
+ UUID_ARGS(&route->header_.uuid));
+return NULL;
+}
 }
 
 /* Parse ip_prefix */
@@ -7795,13 +7799,15 @@ parsed_routes_add(struct ovs_list *routes,
 }
 
 /* Verify that ip_prefix and nexthop have same address familiy. */
-if (IN6_IS_ADDR_V4MAPPED(&prefix) != IN6_IS_ADDR_V4MAPPED(&nexthop)) {
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-VLOG_WARN_RL(&rl, "Address family doesn't match between 'ip_prefix' %s"
- " and 'nexthop' %s in static route"UUID_FMT,
- route->ip_prefix, route->nexthop,
- UUID_ARGS(&route->header_.uuid));
-return NULL;
+if (!is_discard_route) {
+if (IN6_IS_ADDR_V4MAPPED(&prefix) != IN6_IS_ADDR_V4MAPPED(&nexthop)) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+VLOG_WARN_RL(&rl, "Address family doesn't match between 
'ip_prefix' %s"
+ " and 'nexthop' %s in static route"UUID_FMT,
+ route->ip_prefix, route->nexthop,
+ UUID_ARGS(&route->header_.uuid));
+return NULL;
+}
 }
 
 const struct nbrec_bfd *nb_bt = route->bfd;
@@ -7832,6 +7838,7 @@ parsed_routes_add(struct ovs_list *routes,
 pr->route = route;
 pr->ecmp_symmetric_reply = smap_get_bool(&route->options,
  "ecmp_symmetric_reply", false);
+pr->is_discard_route = is_discard_route;
 ovs_list_insert(routes, &pr->list_node);
 return pr;
 }
@@ -8244,10 +8251,11 @@ build_ecmp_route_flow(struct hmap *lflows, struct 
ovn_datapath *od,
 }
 
 static void
-add_route(struct hmap *lflows, const struct ovn_port *op,
-  const char *lrp_addr_s, const char *network_s, int plen,
-  const char *gateway, bool is_src_route,
-  const struct ovsdb_idl_row *stage_hint)
+add_route(struct hmap *lflows, struct ovn_datapath *od,
+  const struct ovn_port *op, const char *lrp_addr_s,
+  const char *network_s, int plen, const char *gateway,
+  bool is_src_route, const struct ovsdb_idl_row *stage_hint,
+  bool is_discard_route)
 {
 bool is_ipv4 = s

Re: [ovs-dev] Fail to compile the git master version of openvswitch on Ubuntu 20.04.

2021-02-22 Thread Ben Pfaff
On Mon, Feb 22, 2021 at 12:00:24PM +0800, Hongyi Zhao wrote:
> I try to compile the git master version of openvswitch on Ubuntu 20.04
> with the following steps:
> 
> $ sudo apt-get build-dep openvswitch-switch
> $ git clone https://github.com/openvswitch/ovs.git openvswitch/ovs.git
> $ cd openvswitch/ovs.git/
> $ ./boot.sh
> $ ./configure
> $ make -j1
> [...]
> make  all-recursive
> make[1]: Entering directory
> '/home/werner/Public/repo/github.com/openvswitch/ovs.git'
> Making all in datapath
> make[2]: Entering directory
> '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> make[3]: Entering directory
> '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> make[3]: Leaving directory
> '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> make[2]: Leaving directory
> '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> make[2]: Entering directory
> '/home/werner/Public/repo/github.com/openvswitch/ovs.git'
> make[3]: Entering directory
> '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> make[3]: 'distfiles' is up to date.
> make[3]: Leaving directory
> '/home/werner/Public/repo/github.com/openvswitch/ovs.git/datapath'
> See above for list of violations of the rule that
> every C source file must #include .
> make[2]: *** [Makefile:6475: config-h-check] Error 1
> make[2]: Leaving directory
> '/home/werner/Public/repo/github.com/openvswitch/ovs.git'
> make[1]: *** [Makefile:5252: all-recursive] Error 1
> make[1]: Leaving directory
> '/home/werner/Public/repo/github.com/openvswitch/ovs.git'
> make: *** [Makefile:2997: all] Error 2

I can't reproduce this and no one else has reported it.  If you can't
otherwise figure out the problem, you can delete the lines for it from
Makefile.am.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v2] ofctrl: Fix the assert seen when flood removing flows with conj actions.

2021-02-22 Thread numans
From: Numan Siddique 

In one of the scaled deployments, ovn-controller is asserting with the
below stack trace

***
 (gdb) bt
   0  raise () from /lib64/libc.so.6
   1  abort () from /lib64/libc.so.6
   2  ovs_abort_valist ("%s: assertion %s failed in %s()") at lib/util.c:419
   3  vlog_abort_valist ("%s: assertion %s failed in %s()") at 
lib/vlog.c:1249
   4  vlog_abort ("%s: assertion %s failed in %s()") at lib/vlog.c:1263
   5  ovs_assert_failure (where="controller/ofctrl.c:1216",
  function="flood_remove_flows_for_sb_uuid",
  condition="ovs_list_is_empty(&f->list_node)") at 
lib/util.c:86
   6  flood_remove_flows_for_sb_uuid (sb_uuid=...134,
flood_remove_nodes=...ef0) at controller/ofctrl.c:1216
   9  ofctrl_flood_remove_flows (flood_remove_nodes=...ef0) at 
controller/ofctrl.c:1280
   10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
ref_name= "5564_pg_14...aac") at controller/lflow.c:522
   11 _flow_output_resource_ref_handler (ref_type=REF_TYPE_PORTGROUP)
at controller/ovn-controller.c:2220
   12 engine_compute () at lib/inc-proc-eng.c:306
   13 engine_run_node (recompute_allowed=true) at lib/inc-proc-eng.c:352
   14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377
   15 main () at controller/ovn-controller.c:2887
***

The reason for this assert is different from the assertion bug fixed in
the commit 858d1dd716("ofctrl: Fix the assert seen when flood removing
flows.")

The above assertion is seen if lflow.c calls ofctrl_add_or_append_flow()
twice for the same flow uuid 'S'.  When this function is called for
the first time, a new desired flow 'f' is created and 'f->references'
will have [(S, f)].  When it is called for the 2nd time, the list
'f->references' is updated as [(S, f), (S,f)].  Later when
flood_remove_flows_for_sb_uuid() is called for 'S', the above assertion
is seen.

This scenarion can happen when ovn-controller receives an update message
from SB ovsdb-server in which a new logical flow 'F' belonging to a
datapath 'D' is added which would result in conjunction flows.  If this
datapath 'D' is added to 'local_datapaths' in the same loop, then
logical flow 'F' is first processed in lflow_add_flows_for_datapath()
and the second time in lflow_handle_changed_flows().

This issue can be fixed in 2 ways
1. Flood remove all the tracked flows in lflow_handle_changed_flows()
   first and then process the modified or newly added logical flows.

2. In the function ofctrl_add_or_append_flow(), check if there is
   already a reference for the flow uuid 'S' in the existing desired
   flows and only append the actions if it doesn't exist.

This patch has taken the approach (1) to ensure correctness of flows.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1929978
Signed-off-by: Numan Siddique 
---
 controller/lflow.c |  34 ---
 tests/ovn.at   | 104 +
 2 files changed, 112 insertions(+), 26 deletions(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index ef48cf5b4..8468cb4b4 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -389,22 +389,19 @@ lflow_handle_changed_flows(struct lflow_ctx_in *l_ctx_in,
 struct controller_event_options controller_event_opts;
 controller_event_opts_init(&controller_event_opts);
 
-/* Handle flow removing first (for deleted or updated lflows), and then
- * handle reprocessing or adding flows, so that when the flows being
- * removed and added with same match conditions can be processed in the
- * proper order */
-
+/* Flood remove the flows for all the tracked lflows.  Its possible that
+ * lflow_add_flows_for_datapath() may have been called before calling
+ * this function. */
 struct hmap flood_remove_nodes = HMAP_INITIALIZER(&flood_remove_nodes);
 struct ofctrl_flood_remove_node *ofrn, *next;
 SBREC_LOGICAL_FLOW_TABLE_FOR_EACH_TRACKED (lflow,
l_ctx_in->logical_flow_table) {
+VLOG_DBG("delete lflow "UUID_FMT, UUID_ARGS(&lflow->header_.uuid));
+ofrn = xmalloc(sizeof *ofrn);
+ofrn->sb_uuid = lflow->header_.uuid;
+hmap_insert(&flood_remove_nodes, &ofrn->hmap_node,
+uuid_hash(&ofrn->sb_uuid));
 if (!sbrec_logical_flow_is_new(lflow)) {
-VLOG_DBG("delete lflow "UUID_FMT,
- UUID_ARGS(&lflow->header_.uuid));
-ofrn = xmalloc(sizeof *ofrn);
-ofrn->sb_uuid = lflow->header_.uuid;
-hmap_insert(&flood_remove_nodes, &ofrn->hmap_node,
-uuid_hash(&ofrn->sb_uuid));
 if (lflow_cache_is_enabled(l_ctx_out->lflow_cache)) {
 lflow_cache_delete(l_ctx_out->lflow_cache,
&lflow->header_.uuid);
@@ -435,21 +432,6 @@ lflow_handle_chan

Re: [ovs-dev] Does the OVS command line support on-root users?

2021-02-22 Thread Vasu Dasari
You should be able to use ovs-vsctl commands as non-root users on a vswitch
started as a non-root user. For example you can look at how any of the unit
test cases are executed in ovs/tests/ofproto.at.

For example, you can run one of the test cases as,
$ cd $ovs_src_dir
$ make check TESTSUITEFLAGS='-k "ofproto - echo request"'


*Vasu Dasari*


On Mon, Feb 22, 2021 at 3:49 AM wangyunjian  wrote:

> Hi all:
>   I have a question to consult: I see that OVS daemon has
> been supported to run as non-root in 2015 with the patch
> "lib/daemon: support --user option for all OVS daemon",
> I would like to know whether the openvswitch command line
> such as ovs-vsctl/ovs-appctl will be called as non-root
> in any plan, and is there any consideration for calling
> as root at present?
>
> Thanks
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 0/5] XDP offload using flow API provider

2021-02-22 Thread Toshiaki Makita

On 2021/02/16 10:49, William Tu wrote:

On Tue, Feb 9, 2021 at 1:39 AM Toshiaki Makita
 wrote:


On 2021/02/05 2:36, William Tu wrote:

Hi Toshiaki,

Thanks for the patch. I've been testing it for a couple days.
I liked it a lot! The compile and build process all work without any issues.


Hi, thank you for reviewing!
Sorry for taking time to reply. It took time to remember every detail of the 
patch set...


On Thu, Jul 30, 2020 at 7:55 PM Toshiaki Makita
 wrote:


This patch adds an XDP-based flow cache using the OVS netdev-offload
flow API provider.  When an OVS device with XDP offload enabled,
packets first are processed in the XDP flow cache (with parse, and
table lookup implemented in eBPF) and if hits, the action processing
are also done in the context of XDP, which has the minimum overhead.

This provider is based on top of William's recently posted patch for
custom XDP load.  When a custom XDP is loaded, the provider detects if
the program supports classifier, and if supported it starts offloading
flows to the XDP program.

The patches are derived from xdp_flow[1], which is a mechanism similar to
this but implemented in kernel.


* Motivation

While userspace datapath using netdev-afxdp or netdev-dpdk shows good
performance, there are use cases where packets better to be processed in
kernel, for example, TCP/IP connections, or container to container
connections.  Current solution is to use tap device or af_packet with
extra kernel-to/from-userspace overhead.  But with XDP, a better solution
is to steer packets earlier in the XDP program, and decides to send to
userspace datapath or stay in kernel.

One problem with current netdev-afxdp is that it forwards all packets to
userspace, The first patch from William (netdev-afxdp: Enable loading XDP
program.) only provides the interface to load XDP program, howerver users
usually don't know how to write their own XDP program.

XDP also supports HW-offload so it may be possible to offload flows to
HW through this provider in the future, although not currently.
The reason is that map-in-map is required for our program to support
classifier with subtables in XDP, but map-in-map is not offloadable.
If map-in-map becomes offloadable, HW-offload of our program may also
be possible.


I think it's too far away for XDP to be offloaded into HW and meet OVS's
feature requirements.


I don't know blockers other than map-in-map, but probably there are more.
If you can provide explicit blockers I can add them in the cover letter.


It's hard to list them when we don't have a full OVS datapath
implemented in XDP.
Here are a couple things I can imagine. How does HW offloaded support:
- AF_XDP socket. The XSK map contains XSK fd, how does it exchange
   the fd to host kernel?
- how does offloaded XDP redirect to another netdev
- Helper functions such as adjust_head for pushing the outer header.


Thanks, this is helpful. will add them.


There is a research prototype here, FYI.
https://www.usenix.org/conference/osdi20/presentation/brunella


This is a presentation about FPGA, not HW offload to SmartNIC, right?


Yes, that's for offloading to FPGA.




* How to use

1. Install clang/llvm >= 9, libbpf >= 0.0.6 (included in kernel 5.5), and
 kernel >= 5.3.

2. make with --enable-afxdp --enable-xdp-offload
--enable-bpf will generate XDP program "bpf/flowtable_afxdp.o".  Note that


typo: I think you mean --enable-xdp-offload


Thanks.




the BPF object will not be installed anywhere by "make install" at this point.

3. Load custom XDP program
E.g.
$ ovs-vsctl add-port ovsbr0 veth0 -- set int veth0 options:xdp-mode=native \
options:xdp-obj="/path/to/ovs/bpf/flowtable_afxdp.o"
$ ovs-vsctl add-port ovsbr0 veth1 -- set int veth1 options:xdp-mode=native \
options:xdp-obj="/path/to/ovs/bpf/flowtable_afxdp.o"

4. Enable XDP_REDIRECT
If you use veth devices, make sure to load some (possibly dummy) programs
on the peers of veth devices. This patch set includes a program which
does nothing but returns XDP_PASS. You can use it for the veth peer like
this:
$ ip link set veth1 xdpdrv object /path/to/ovs/bpf/xdp_noop.o section xdp


I'd suggest not using "veth1" as an example, because in (3) above, people
might think "veth1" is already attached to ovsbr0.
IIUC, here your "veth1" should be the device at the peer inside
another namespace.


Sure, will rename it.



Some HW NIC drivers require as many queues as cores on its system. Tweak
queues using "ethtool -L".

5. Enable hw-offload
$ ovs-vsctl set Open_vSwitch . other_config:offload-driver=linux_xdp
$ ovs-vsctl set Open_vSwitch . other_config:hw-offload=true
This will starts offloading flows to the XDP program.

You should be able to see some maps installed, including "debug_stats".
$ bpftool map

If packets are successfully redirected by the XDP program,
debug_stats[2] will be counted.
$ bpftool map dump id 

Currently only very limited keys and output actions are supported.
For example NORMAL action entry and IP based ma

Re: [ovs-dev] [PATCH ovn] ofctrl: Don't append actions for conj flows if called twice for same flow uuid.

2021-02-22 Thread Numan Siddique
On Mon, Feb 22, 2021 at 6:27 PM Dumitru Ceara  wrote:
>
> On 2/22/21 11:58 AM, Numan Siddique wrote:
> > On Mon, Feb 22, 2021 at 3:13 PM Dumitru Ceara  wrote:
> >>
> >> On 2/22/21 10:23 AM, Numan Siddique wrote:
> >>> On Mon, Feb 22, 2021 at 2:17 PM Dumitru Ceara  wrote:
> 
>  On 2/21/21 12:34 PM, num...@ovn.org wrote:
> > From: Numan Siddique 
> >
> > In one of the scaled deployments, ovn-controller is asserting with the
> > below stack trace
> >
> >***
> > (gdb) bt
> >   0  raise () from /lib64/libc.so.6
> >   1  abort () from /lib64/libc.so.6
> >   2  ovs_abort_valist ("%s: assertion %s failed in %s()") at 
> > lib/util.c:419
> >   3  vlog_abort_valist ("%s: assertion %s failed in %s()") at 
> > lib/vlog.c:1249
> >   4  vlog_abort ("%s: assertion %s failed in %s()") at 
> > lib/vlog.c:1263
> >   5  ovs_assert_failure (where="controller/ofctrl.c:1216",
> >  
> > function="flood_remove_flows_for_sb_uuid",
> >  
> > condition="ovs_list_is_empty(&f->list_node)") at lib/util.c:86
> >   6  flood_remove_flows_for_sb_uuid (sb_uuid=...134,
> >flood_remove_nodes=...ef0) at controller/ofctrl.c:1216
> >   9  ofctrl_flood_remove_flows (flood_remove_nodes=...ef0) at 
> > controller/ofctrl.c:1280
> >   10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
> >ref_name= "5564_pg_14...aac") at controller/lflow.c:522
> >   11 _flow_output_resource_ref_handler 
> > (ref_type=REF_TYPE_PORTGROUP)
> >at controller/ovn-controller.c:2220
> >   12 engine_compute () at lib/inc-proc-eng.c:306
> >   13 engine_run_node (recompute_allowed=true) at 
> > lib/inc-proc-eng.c:352
> >   14 engine_run (recompute_allowed=true) at 
> > lib/inc-proc-eng.c:377
> >   15 main () at controller/ovn-controller.c:2887
> >***
> >
> > The reason for this assert is different from the assertion bug fixed in
> > the commit 858d1dd716("ofctrl: Fix the assert seen when flood removing
> > flows.")
> >
> > The above assertion is seen if lflow.c calls ofctrl_add_or_append_flow()
> > twice for the same flow uuid 'S'.  When this function is called for
> > the first time, a new desired flow 'f' is created and 'f->references'
> > will have [(S, f)].  When it is called for the 2nd time, the list
> > 'f->references' is updated as [(S, f), (S,f)].  Later when
> > flood_remove_flows_for_sb_uuid() is called for 'S', the above assertion
> > is seen.
> >
> > This scenarion can happen when ovn-controller receives an update message
> > from SB ovsdb-server in which a new logical flow 'F' belonging to a
> > datapath 'D' is added which would result in conjunction flows.  If this
> > datapath 'D' is added to 'local_datapaths' in the same loop, then
> > logical flow 'F' is first processed in lflow_add_flows_for_datapath()
> > and the second time in lflow_handle_changed_flows().
> >
> > This issue can be fixed in 2 ways
> > 1. Flood remove all the tracked flows in lflow_handle_changed_flows()
> >   first and then process the modified or newly added logical flows.
> >
> > 2. In the function ofctrl_add_or_append_flow(), check if there is
> >   already a reference for the flow uuid 'S' in the existing desired
> >   flows and only append the actions if it doesn't exist.
> >
> > This patch has taken the approach (2) as that seems better and
> > effecient.
> >
> > Signed-off-by: Numan Siddique 
> > ---
> 
>  Hi Numan,
> 
>  Nice work tracking this down!
> 
>  This is not a full review, just an initial question to see if there's
>  also an option #3 to fix this issue:
> 
>  Can we try to avoid processing the same lflow twice?  E.g., in
>  lflow_add_flows_for_datapath(), call consider_logical_flow__() only for
>  logical flows that are not on the table's 'track_list', so have not
>  changed in the SB.
> >>>
> >>> Hi Dumitru,
> >>>
> >>> Thanks for the comments. I think this would require running  a for
> >>> loop of tracked
> >>> flows for every logical flow for the datapath and checking if it is in
> >>> the tracked list or not.
> >>
> >> OVS IDL doesn't explicitly expose an API to check if a record has been
> >> changed (i.e., is on the track list).  We could add one, and use it in
> >> lflow_add_flows_for_datapath() when iterating over lflows, to just skip
> >> records that were marked as updates already.
> >>
> >>> I think we could do this too. But in my opinion we should support
> >>> ofctrl_add_or_append_flow() to be called twice for the same uuid.
> >>
> >> I'm not sure about this.  Why should 

Re: [ovs-dev] [PATCH v3 ovn] controller: introduce coverage counters for ovn-controller incremental processing

2021-02-22 Thread Kevin Traynor
On 19/02/2021 14:26, Lorenzo Bianconi wrote:
> Introduce inc-engine/stats ovs-applctl command in order to dump
> ovn-controller incremental processing engine statistics. So far for each
> node a counter for run, abort and engine_handler have been added.
> Counters are incremented when the node move to "updated" state.
> In order to dump I-P stats we can can use the following commands:
> 
> $ovs-appctl -t ovn-controller inc-engine/stats
> SB_address_set
> run 1   abort   0   handler 0
> addr_sets
> run 2   abort   1   handler 0
> SB_port_group
> run 0   abort   0   handler 0
> port_groups
> run 2   abort   0   handler 0
> ofctrl_is_connected
> run 1   abort   0   handler 0
> OVS_open_vswitch
> run 0   abort   0   handler 0
> OVS_bridge
> run 0   abort   0   handler 0
> OVS_qos
> run 0   abort   0   handler 0
> SB_chassis
> run 1   abort   0   handler 0
> 
> 
> flow_output
> run 2   abort   0   handler 34
> 
> Morover introduce the inc-engine/stats-clear command to reset engine
> statistics
> 
> $ovs-appctl -t ovn-controller inc-engine/stats-clear
> 

Hi Lorenzo,

One disadvantage of a clear/show type stat is you don't have builtin
rate information like coverage counters and if you get an offline
report, you don't know how long it has been since the counters were
cleared, or if they were ever cleared.

You could consider to embed duration info into the show command. For
example, 'pmd-perf-show' command shows e.g.

$ ovs-appctl dpif-netdev/pmd-perf-show
Time: 14:07:57.468
Measurement duration: 4.480 s



For the pmd case, I'm not sure that wall clock is really useful, but
measurement duration (time since last clear) is.

Kevin.


> Signed-off-by: Lorenzo Bianconi 
> ---
> Changes since v2:
> - introduce inc-engine/stats and inc-engine/stats-clear commands and drop
>   COVERAGE_* dependency
> 
> Changes since v1:
> - drop handler counters and add global abort counter
> - improve documentation and naming scheme
> - introduce engine_set_node_updated utility macro
> ---
>  controller/ovn-controller.c | 53 ++---
>  lib/inc-proc-eng.c  | 48 +
>  lib/inc-proc-eng.h  | 26 ++
>  3 files changed, 106 insertions(+), 21 deletions(-)
> 
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 4343650fc..a937803c8 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1011,7 +1011,7 @@ en_ofctrl_is_connected_run(struct engine_node *node, 
> void *data)
>  ofctrl_seqno_flush();
>  binding_seqno_flush();
>  }
> -engine_set_node_state(node, EN_UPDATED);
> +engine_set_note_update_from_run(node);
>  return;
>  }
>  engine_set_node_state(node, EN_UNCHANGED);
> @@ -1067,7 +1067,7 @@ en_addr_sets_run(struct engine_node *node, void *data)
>  addr_sets_init(as_table, &as->addr_sets);
>  
>  as->change_tracked = false;
> -engine_set_node_state(node, EN_UPDATED);
> +engine_set_note_update_from_run(node);
>  }
>  
>  static bool
> @@ -1088,7 +1088,7 @@ addr_sets_sb_address_set_handler(struct engine_node 
> *node, void *data)
>  
>  if (!sset_is_empty(&as->new) || !sset_is_empty(&as->deleted) ||
>  !sset_is_empty(&as->updated)) {
> -engine_set_node_state(node, EN_UPDATED);
> +engine_set_note_update_from_handler(node);
>  } else {
>  engine_set_node_state(node, EN_UNCHANGED);
>  }
> @@ -1147,7 +1147,7 @@ en_port_groups_run(struct engine_node *node, void *data)
>  port_groups_init(pg_table, &pg->port_groups);
>  
>  pg->change_tracked = false;
> -engine_set_node_state(node, EN_UPDATED);
> +engine_set_note_update_from_run(node);
>  }
>  
>  static bool
> @@ -1168,7 +1168,7 @@ port_groups_sb_port_group_handler(struct engine_node 
> *node, void *data)
>  
>  if (!sset_is_empty(&pg->new) || !sset_is_empty(&pg->deleted) ||
>  !sset_is_empty(&pg->updated)) {
> -engine_set_node_state(node, EN_UPDATED);
> +engine_set_note_update_from_handler(node);
>  } else {
>  engine_set_node_state(node, EN_UNCHANGED);
>  }
> @@ -1482,7 +1482,7 @@ en_runtime_data_run(struct engine_node *node, void 
> *data)
>  
>  binding_run(&b_ctx_in, &b_ctx_out);
>  
> -engine_set_node_state(node, EN_UPDATED);
> +engine_set_note_update_from_run(node);
>  }
>  
>  static bool
> @@ -1500,7 +1500,7 @@ runtime_data_ovs_interface_handler(struct engine_node 
> *node, void *data)
>  }
>  
>  if (b_ctx_out.local_lports_changed) {
> -engine_set_node_state(node, EN_UPDATED);
> +engine_set_note_update_from_handler(node);
>  rt_data->local_lports_changed = b_ctx_out.local_lports_changed;
>  }
>  
> @@ -

Re: [ovs-dev] [PATCH v3 ovn] controller: introduce coverage counters for ovn-controller incremental processing

2021-02-22 Thread Mark Gray
On 22/02/2021 12:03, Lorenzo Bianconi wrote:
>> On 2/19/21 3:26 PM, Lorenzo Bianconi wrote:
>>> Introduce inc-engine/stats ovs-applctl command in order to dump
>>> ovn-controller incremental processing engine statistics. So far for each
>>> node a counter for run, abort and engine_handler have been added.
>>> Counters are incremented when the node move to "updated" state.
>>> In order to dump I-P stats we can can use the following commands:
>>>
>>> $ovs-appctl -t ovn-controller inc-engine/stats
>>> SB_address_set
>>>  run 1   abort   0   handler 0
>>> addr_sets
>>>  run 2   abort   1   handler 0
>>> SB_port_group
>>>  run 0   abort   0   handler 0
>>> port_groups
>>>  run 2   abort   0   handler 0
>>> ofctrl_is_connected
>>>  run 1   abort   0   handler 0
>>> OVS_open_vswitch
>>>  run 0   abort   0   handler 0
>>> OVS_bridge
>>>  run 0   abort   0   handler 0
>>> OVS_qos
>>>  run 0   abort   0   handler 0
>>> SB_chassis
>>>  run 1   abort   0   handler 0
>>> 
>>>
>>> flow_output
>>>  run 2   abort   0   handler 34
>>>
>>> Morover introduce the inc-engine/stats-clear command to reset engine
>>> statistics
>>>
>>> $ovs-appctl -t ovn-controller inc-engine/stats-clear
>>>
>>> Signed-off-by: Lorenzo Bianconi 
>>> ---
>>
>> Hi Lorenzo,
>>
>> Thanks for working on this!
>>
> 
> Hi Dumitru,
> 
> thx for the review.
> 
>> The commit title needs to be updated as we're not adding coverage counters
>> anymore.
>>
> 
> ack, I will fix it in v4.
> 
>>> Changes since v2:
>>> - introduce inc-engine/stats and inc-engine/stats-clear commands and drop
>>>COVERAGE_* dependency
>>>
>>> Changes since v1:
>>> - drop handler counters and add global abort counter
>>> - improve documentation and naming scheme
>>> - introduce engine_set_node_updated utility macro
>>> ---
>>>   controller/ovn-controller.c | 53 ++---
>>>   lib/inc-proc-eng.c  | 48 +
>>>   lib/inc-proc-eng.h  | 26 ++
>>>   3 files changed, 106 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>>> index 4343650fc..a937803c8 100644
>>> --- a/controller/ovn-controller.c
>>> +++ b/controller/ovn-controller.c
>>> @@ -1011,7 +1011,7 @@ en_ofctrl_is_connected_run(struct engine_node *node, 
>>> void *data)
>>>   ofctrl_seqno_flush();
>>>   binding_seqno_flush();
>>>   }
>>> -engine_set_node_state(node, EN_UPDATED);
>>> +engine_set_note_update_from_run(node);
>>>   return;
>>>   }
>>>   engine_set_node_state(node, EN_UNCHANGED);
>>> @@ -1067,7 +1067,7 @@ en_addr_sets_run(struct engine_node *node, void *data)
>>>   addr_sets_init(as_table, &as->addr_sets);
>>>   as->change_tracked = false;
>>> -engine_set_node_state(node, EN_UPDATED);
>>> +engine_set_note_update_from_run(node);
>>
>> This seems error prone to me, what if a new engine node is added and the
>> wrong macro (or no macro) is used to set the state?
>>
>> It's probably simpler to increment the stats in the inc-proc engine
>> implementation directly:
>>
>> - change handlers are only called in one place, engine_compute(): if a
>> change handler sets the node state to EN_UPDATED we can increment the node's
>> change handler stats.
>> - run handlers are only called in one place, engine_recompute(): if a node
>> handler sets the node state to EN_UPDATED we can increment the node's run
>> handler stats.
> 
> ack, the code is much simpler doing so, thx :) I will fix it in v4.

I made a suggestion in v4, I think we should count the state at the end
of each engine_node_run() invocation (as this will count the final state
of each node).

I also suggestion counting the number of times we "compute" (i.e.
attempt an incremental_compute) and "recompute". Dumitru, what do you
think - as this goes against what you are suggestion?
>>
>>>   }
>>>   static bool
>>> @@ -1088,7 +1088,7 @@ addr_sets_sb_address_set_handler(struct engine_node 
>>> *node, void *data)
>>>   if (!sset_is_empty(&as->new) || !sset_is_empty(&as->deleted) ||
>>>   !sset_is_empty(&as->updated)) {
>>> -engine_set_node_state(node, EN_UPDATED);
>>> +engine_set_note_update_from_handler(node);
>>>   } else {
>>>   engine_set_node_state(node, EN_UNCHANGED);
>>>   }
>>> @@ -1147,7 +1147,7 @@ en_port_groups_run(struct engine_node *node, void 
>>> *data)
>>>   port_groups_init(pg_table, &pg->port_groups);
>>>   pg->change_tracked = false;
>>> -engine_set_node_state(node, EN_UPDATED);
>>> +engine_set_note_update_from_run(node);
>>>   }
>>>   static bool
>>> @@ -1168,7 +1168,7 @@ port_groups_sb_port_group_handler(struct engine_node 
>>> *node, void *data)
>>>   if (!sset_is_e

Re: [ovs-dev] [PATCH v4 ovn] controller: introduce stats counters for ovn-controller incremental processing

2021-02-22 Thread Mark Gray
On 22/02/2021 12:15, Lorenzo Bianconi wrote:

Thanks Lorenzo, I think the change to unixctl commands looks good. Some
more comments below.

> Introduce inc-engine/stats ovs-applctl command in order to dump

nit: inc-engine/show-stats or clear-stats?

and

s/applctl/appctl

> ovn-controller incremental processing engine statistics. So far for each
> node a counter for run, abort and engine_handler have been added.
> Counters are incremented when the node move to "updated" state.
> In order to dump I-P stats we can can use the following commands:
> 
> $ovs-appctl -t ovn-controller inc-engine/stats
> SB_address_set
> run 1   abort   0   change-handler 0
> addr_sets
> run 2   abort   1   change-handler 0
> SB_port_group
> run 0   abort   0   change-handler 0
> port_groups
> run 2   abort   0   change-handler 0
> ofctrl_is_connected
> run 1   abort   0   change-handler 0
> OVS_open_vswitch
> run 0   abort   0   change-handler 0
> OVS_bridge
> run 0   abort   0   change-handler 0
> OVS_qos
> run 0   abort   0   change-handler 0
> SB_chassis
> run 1   abort   0   change-handler 0
> 
> 
> flow_output
> run 2   abort   0   change-handler 34
> 
> Morover introduce the inc-engine/stats-clear command to reset engine

s/morover/moreover
> statistics
> 
> $ovs-appctl -t ovn-controller inc-engine/stats-clear
> 
> Signed-off-by: Lorenzo Bianconi 
> ---
> Changes since v3:
> - drop engine_set_note_update_from_run/engine_set_note_update_from_handler
>   macros and move stats code in lib/inc-proc-eng.c
> - fix commit log
> Changes since v2:
> - introduce inc-engine/stats and inc-engine/stats-clear commands and drop
>   COVERAGE_* dependency
> Changes since v1:
> - drop handler counters and add global abort counter
> - improve documentation and naming scheme
> - introduce engine_set_node_updated utility macro
> ---
>  controller/ovn-controller.c |  4 
>  lib/inc-proc-eng.c  | 35 +++
>  lib/inc-proc-eng.h  | 18 ++
>  3 files changed, 57 insertions(+)
> 
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 5dd643f52..52e7b1932 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -2710,6 +2710,10 @@ main(int argc, char *argv[])
>  
>  unixctl_command_register("recompute", "", 0, 0, engine_recompute_cmd,
>   NULL);
> +unixctl_command_register("inc-engine/stats", "", 0, 0, engine_dump_stats,
> + NULL);
> +unixctl_command_register("inc-engine/stats-clear", "", 0, 0,
> + engine_clear_stats, NULL);
>  unixctl_command_register("lflow-cache/flush", "", 0, 0,
>   lflow_cache_flush_cmd,
>   &flow_output_data->pd);

Do we have to call this here in ovn-controller.c? Can we not call them
in inc-proc-eng.c. We should try to decouple this entirely from
ovn-controller.c. I think we can implement nearly the entire
functionality in inc-proc-eng.c
> diff --git a/lib/inc-proc-eng.c b/lib/inc-proc-eng.c
> index 916dbbe39..facd59e5b 100644
> --- a/lib/inc-proc-eng.c
> +++ b/lib/inc-proc-eng.c
> @@ -283,11 +283,13 @@ engine_recompute(struct engine_node *node, bool forced, 
> bool allowed)
>  if (!allowed) {
>  VLOG_DBG("node: %s, recompute aborted", node->name);
>  engine_set_node_state(node, EN_ABORTED);
> +node->stats.abort++;

We could move this to engine_run() doing something like this:

for (size_t i = 0; i < engine_n_nodes; i++) {
engine_run_node(engine_nodes[i], recompute_allowed);

switch(engine_node[i]->state) {
case EN_ABORTED:
engine_node[i]->stats.abort++;
break;
case EN_UPDATED: // <- we could add other states
engine_node[i]->stats.updated++;
break;
case EN_UNCHANGED: // <- we could add other states
engine_node[i]->stats.unchanged++;
break;
default:
/* Should not reach here */
ovs_assert();
}
}

if (engine_nodes[i]->state == EN_ABORTED) {
engine_run_aborted = true;
return;

The reason that I suggest this is that, at this point (i think), we have
finished processing the node for this run so we are logging the absolute
final state. Also, i think it is easy to count other states (as above)

What do you think?
>  return;
>  }
>  
>  /* Run the node handler which might change state. */
>  node->run(node, node->data);
> +node->stats.run++;


>  }
>  
>  /* Return true if the node could be computed, false otherwise. */
> @@ -310,6 +312,7 @@ engine_compute(struct engine_nod

Re: [ovs-dev] [PATCHv2] connmgr: Check nullptr inside ofmonitor_report()

2021-02-22 Thread 贺鹏
Hi, Ilya and William,

Ilya Maximets  于2021年2月22日周一 下午8:12写道:
>
> On 2/21/21 5:11 PM, William Tu wrote:
> > On Fri, Feb 19, 2021 at 6:29 PM 贺鹏  wrote:
> >>
> >> Hi, Ilya
> >>
> >> Ilya Maximets  于2021年2月19日周五 下午7:19写道:
> >>>
> >>> On 2/19/21 3:12 AM, 贺鹏 wrote:
>  Hi,
> 
>  Looks like this bug is caused by violating the fact that if a rule is
>  referenced, the related ofproto should not be destroyed.
> 
>  If so, I have a patch that also fixes the problem, not sure if this 
>  helps.
> 
>  http://patchwork.ozlabs.org/project/openvswitch/patch/20200717015041.82746-1-hepeng.0...@bytedance.com/
> >>>
> >>> There is at least one more problem that is not strictly related but
> >>> in more or less the same part of the code:
> >>>   https://mail.openvswitch.org/pipermail/ovs-dev/2021-February/380582.html
> >>
> >> So maybe before add it into *ovsrcu_postpone*, we should add refcount
> >> of ofproto also?
> >>
> > Yes, I think that will fix the issue. But are we able to find out all the
> > places that we need to add refcount of ofproto?

Yes, this could be difficult.

>
> destruct() is called for ofproto_dpif unconditionally without any deferring,
> so this will not help unless we're delaying the the destruct() itself, and
> I'm not sure if we can do that.
>
> >
> > Looks like we might have multiple rcu postponed function that might
> > access the 'ofproto'. And 'ofproto' might be freed already and cause 
> > segfault.
> >
> > Hepeng's patch fixes two places.
> >   
> > http://patchwork.ozlabs.org/project/openvswitch/patch/20200717015041.82746-1-hepeng.0...@bytedance.com/
> > Ilya pointed out another place
> >   https://mail.openvswitch.org/pipermail/ovs-dev/2021-February/380582.html
> > yifeng's case is a little different (not due to ofproto = NULL, but
> > due to setting
> > the p->connmgr = NULL before postponed)
>
> In my case ofproto is alive too.  The problem is destroyed ofproto->baker.
> And in case of meter_id we don't really need to refcount the ofproto itself.
> 'baker' already has a refcount, so we could increase it and pass 'baker'
> pointer to free_meter_id instead of 'ofproto'.  This function doesn't
> actually need an 'ofproto' pointer.
>
> Regarding this connmgr related patch... It's still valid even with refcounts
> because destruction of connmgr is explicitly not deferred for a reason.  It's
> to free the listening socket that users might want to reuse.  Refcounts will
> not help here.
> Also for the meters related issue.. destruct() is called for ofproto_dpif
> without any postponing and I'm not sure if we need or actually able to
> postpone it without consequences.  So, baker->refcount might be a better
> solution keeping the ofproto->refcount only for rules.  This will reduce
> the scope of changes, otherwise we will have to do a lot more work tracking
> down all the users that holds 'ofproto' pointer and will miss some of them
> with high probability.

Agree.
I recheck the patch. It's a better idea to use ofproto->backer's refcount.
But in the rule case, looks like the rule->ofproto needs to be alive
to access both ofproto->vl_mff_map and ofproto->ofproto_class.

 static void
 rule_destroy_cb(struct rule *rule)
 OVS_NO_THREAD_SAFETY_ANALYSIS
 {
 /* Send rule removed if needed. */
 if (rule->flags & OFPUTIL_FF_SEND_FLOW_REM
 && rule->removed_reason != OVS_OFPRR_NONE
 && !rule_is_hidden(rule)) {
 ofproto_rule_send_removed(rule);
 }
 rule->ofproto->ofproto_class->rule_destruct(rule);
 mf_vl_mff_unref(&rule->ofproto->vl_mff_map, rule->match_tlv_bitmap);
 mf_vl_mff_unref(&rule->ofproto->vl_mff_map, rule->ofpacts_tlv_bitmap);
 ofproto_rule_destroy__(rule);
 }

Maybe add refcount into vl_mff_map and ofproto_class?

>
> I'll recheck this (connmgr) patch and, likely, apply it.   Will also review
> ofproto refcount patch.
>
> Best regards, Ilya Maximets.



-- 
hepeng
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ofctrl: Don't append actions for conj flows if called twice for same flow uuid.

2021-02-22 Thread Dumitru Ceara

On 2/22/21 11:58 AM, Numan Siddique wrote:

On Mon, Feb 22, 2021 at 3:13 PM Dumitru Ceara  wrote:


On 2/22/21 10:23 AM, Numan Siddique wrote:

On Mon, Feb 22, 2021 at 2:17 PM Dumitru Ceara  wrote:


On 2/21/21 12:34 PM, num...@ovn.org wrote:

From: Numan Siddique 

In one of the scaled deployments, ovn-controller is asserting with the
below stack trace

   ***
(gdb) bt
  0  raise () from /lib64/libc.so.6
  1  abort () from /lib64/libc.so.6
  2  ovs_abort_valist ("%s: assertion %s failed in %s()") at 
lib/util.c:419
  3  vlog_abort_valist ("%s: assertion %s failed in %s()") at 
lib/vlog.c:1249
  4  vlog_abort ("%s: assertion %s failed in %s()") at lib/vlog.c:1263
  5  ovs_assert_failure (where="controller/ofctrl.c:1216",
 function="flood_remove_flows_for_sb_uuid",
 condition="ovs_list_is_empty(&f->list_node)") 
at lib/util.c:86
  6  flood_remove_flows_for_sb_uuid (sb_uuid=...134,
   flood_remove_nodes=...ef0) at controller/ofctrl.c:1216
  9  ofctrl_flood_remove_flows (flood_remove_nodes=...ef0) at 
controller/ofctrl.c:1280
  10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
   ref_name= "5564_pg_14...aac") at controller/lflow.c:522
  11 _flow_output_resource_ref_handler (ref_type=REF_TYPE_PORTGROUP)
   at controller/ovn-controller.c:2220
  12 engine_compute () at lib/inc-proc-eng.c:306
  13 engine_run_node (recompute_allowed=true) at lib/inc-proc-eng.c:352
  14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377
  15 main () at controller/ovn-controller.c:2887
   ***

The reason for this assert is different from the assertion bug fixed in
the commit 858d1dd716("ofctrl: Fix the assert seen when flood removing
flows.")

The above assertion is seen if lflow.c calls ofctrl_add_or_append_flow()
twice for the same flow uuid 'S'.  When this function is called for
the first time, a new desired flow 'f' is created and 'f->references'
will have [(S, f)].  When it is called for the 2nd time, the list
'f->references' is updated as [(S, f), (S,f)].  Later when
flood_remove_flows_for_sb_uuid() is called for 'S', the above assertion
is seen.

This scenarion can happen when ovn-controller receives an update message
from SB ovsdb-server in which a new logical flow 'F' belonging to a
datapath 'D' is added which would result in conjunction flows.  If this
datapath 'D' is added to 'local_datapaths' in the same loop, then
logical flow 'F' is first processed in lflow_add_flows_for_datapath()
and the second time in lflow_handle_changed_flows().

This issue can be fixed in 2 ways
1. Flood remove all the tracked flows in lflow_handle_changed_flows()
  first and then process the modified or newly added logical flows.

2. In the function ofctrl_add_or_append_flow(), check if there is
  already a reference for the flow uuid 'S' in the existing desired
  flows and only append the actions if it doesn't exist.

This patch has taken the approach (2) as that seems better and
effecient.

Signed-off-by: Numan Siddique 
---


Hi Numan,

Nice work tracking this down!

This is not a full review, just an initial question to see if there's
also an option #3 to fix this issue:

Can we try to avoid processing the same lflow twice?  E.g., in
lflow_add_flows_for_datapath(), call consider_logical_flow__() only for
logical flows that are not on the table's 'track_list', so have not
changed in the SB.


Hi Dumitru,

Thanks for the comments. I think this would require running  a for
loop of tracked
flows for every logical flow for the datapath and checking if it is in
the tracked list or not.


OVS IDL doesn't explicitly expose an API to check if a record has been
changed (i.e., is on the track list).  We could add one, and use it in
lflow_add_flows_for_datapath() when iterating over lflows, to just skip
records that were marked as updates already.


I think we could do this too. But in my opinion we should support
ofctrl_add_or_append_flow() to be called twice for the same uuid.


I'm not sure about this.  Why should we support adding the same flow
twice?  It seems like a bug to me.


Sorry for the confusion.  What I mean is if the user calls it twice, we
should warn about it and return.

Like how we do here -
https://github.com/ovn-org/ovn/blob/master/controller/ofctrl.c#L1037
in ofctrl_check_and_add_flow().





When ofctrl_add_or_append_flow() is called twice, it should result in a no-op.


What if we implement (3) and also add an assert or check in
link_flow_to_sb() to not allow the same flow to be added twice to a SB uuid?



We could end up in a similar scenario later when incrementally processing
other changes.


As far as I see, we currently call ofctrl_add_or_append_flow() only for
logical flows so, at least for now, no other incrementally processed SB
record types can cause this issue.



[ovs-dev] [PATCH v4 ovn] controller: introduce stats counters for ovn-controller incremental processing

2021-02-22 Thread Lorenzo Bianconi
Introduce inc-engine/stats ovs-applctl command in order to dump
ovn-controller incremental processing engine statistics. So far for each
node a counter for run, abort and engine_handler have been added.
Counters are incremented when the node move to "updated" state.
In order to dump I-P stats we can can use the following commands:

$ovs-appctl -t ovn-controller inc-engine/stats
SB_address_set
run 1   abort   0   change-handler 0
addr_sets
run 2   abort   1   change-handler 0
SB_port_group
run 0   abort   0   change-handler 0
port_groups
run 2   abort   0   change-handler 0
ofctrl_is_connected
run 1   abort   0   change-handler 0
OVS_open_vswitch
run 0   abort   0   change-handler 0
OVS_bridge
run 0   abort   0   change-handler 0
OVS_qos
run 0   abort   0   change-handler 0
SB_chassis
run 1   abort   0   change-handler 0


flow_output
run 2   abort   0   change-handler 34

Morover introduce the inc-engine/stats-clear command to reset engine
statistics

$ovs-appctl -t ovn-controller inc-engine/stats-clear

Signed-off-by: Lorenzo Bianconi 
---
Changes since v3:
- drop engine_set_note_update_from_run/engine_set_note_update_from_handler
  macros and move stats code in lib/inc-proc-eng.c
- fix commit log
Changes since v2:
- introduce inc-engine/stats and inc-engine/stats-clear commands and drop
  COVERAGE_* dependency
Changes since v1:
- drop handler counters and add global abort counter
- improve documentation and naming scheme
- introduce engine_set_node_updated utility macro
---
 controller/ovn-controller.c |  4 
 lib/inc-proc-eng.c  | 35 +++
 lib/inc-proc-eng.h  | 18 ++
 3 files changed, 57 insertions(+)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 5dd643f52..52e7b1932 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -2710,6 +2710,10 @@ main(int argc, char *argv[])
 
 unixctl_command_register("recompute", "", 0, 0, engine_recompute_cmd,
  NULL);
+unixctl_command_register("inc-engine/stats", "", 0, 0, engine_dump_stats,
+ NULL);
+unixctl_command_register("inc-engine/stats-clear", "", 0, 0,
+ engine_clear_stats, NULL);
 unixctl_command_register("lflow-cache/flush", "", 0, 0,
  lflow_cache_flush_cmd,
  &flow_output_data->pd);
diff --git a/lib/inc-proc-eng.c b/lib/inc-proc-eng.c
index 916dbbe39..facd59e5b 100644
--- a/lib/inc-proc-eng.c
+++ b/lib/inc-proc-eng.c
@@ -283,11 +283,13 @@ engine_recompute(struct engine_node *node, bool forced, 
bool allowed)
 if (!allowed) {
 VLOG_DBG("node: %s, recompute aborted", node->name);
 engine_set_node_state(node, EN_ABORTED);
+node->stats.abort++;
 return;
 }
 
 /* Run the node handler which might change state. */
 node->run(node, node->data);
+node->stats.run++;
 }
 
 /* Return true if the node could be computed, false otherwise. */
@@ -310,6 +312,7 @@ engine_compute(struct engine_node *node, bool 
recompute_allowed)
 engine_recompute(node, false, recompute_allowed);
 return (node->state != EN_ABORTED);
 }
+node->stats.change_handler++;
 }
 }
 return true;
@@ -401,3 +404,35 @@ engine_need_run(void)
 }
 return false;
 }
+
+void
+engine_clear_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED)
+{
+for (size_t i = 0; i < engine_n_nodes; i++) {
+struct engine_node *node = engine_nodes[i];
+
+memset(&node->stats, 0, sizeof(node->stats));
+}
+unixctl_command_reply(conn, NULL);
+}
+
+void
+engine_dump_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
+  const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED)
+{
+struct ds dump = DS_EMPTY_INITIALIZER;
+
+for (size_t i = 0; i < engine_n_nodes; i++) {
+struct engine_node *node = engine_nodes[i];
+
+ds_put_format(&dump, "%s\n", node->name);
+ds_put_format(&dump, "\trun\t%lu", node->stats.run);
+ds_put_format(&dump, "\tabort\t%lu", node->stats.abort);
+ds_put_format(&dump, "\tchange-handler\t%lu\n",
+  node->stats.change_handler);
+}
+unixctl_command_reply(conn, ds_cstr(&dump));
+
+ds_destroy(&dump);
+}
diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
index 857234677..bc8744a0d 100644
--- a/lib/inc-proc-eng.h
+++ b/lib/inc-proc-eng.h
@@ -60,6 +60,8 @@
  * against all its inputs.
  */
 
+#include "unixctl.h"
+
 #define ENGINE_MAX_INPUT 256
 #define ENGINE_MAX_OVSDB_INDEX 256
 
@@ -107,6 +109,12 @@ enum engine_node_state {

Re: [ovs-dev] [PATCHv2] connmgr: Check nullptr inside ofmonitor_report()

2021-02-22 Thread Ilya Maximets
On 2/21/21 5:11 PM, William Tu wrote:
> On Fri, Feb 19, 2021 at 6:29 PM 贺鹏  wrote:
>>
>> Hi, Ilya
>>
>> Ilya Maximets  于2021年2月19日周五 下午7:19写道:
>>>
>>> On 2/19/21 3:12 AM, 贺鹏 wrote:
 Hi,

 Looks like this bug is caused by violating the fact that if a rule is
 referenced, the related ofproto should not be destroyed.

 If so, I have a patch that also fixes the problem, not sure if this helps.

 http://patchwork.ozlabs.org/project/openvswitch/patch/20200717015041.82746-1-hepeng.0...@bytedance.com/
>>>
>>> There is at least one more problem that is not strictly related but
>>> in more or less the same part of the code:
>>>   https://mail.openvswitch.org/pipermail/ovs-dev/2021-February/380582.html
>>
>> So maybe before add it into *ovsrcu_postpone*, we should add refcount
>> of ofproto also?
>>
> Yes, I think that will fix the issue. But are we able to find out all the
> places that we need to add refcount of ofproto?

destruct() is called for ofproto_dpif unconditionally without any deferring,
so this will not help unless we're delaying the the destruct() itself, and
I'm not sure if we can do that.

> 
> Looks like we might have multiple rcu postponed function that might
> access the 'ofproto'. And 'ofproto' might be freed already and cause segfault.
> 
> Hepeng's patch fixes two places.
>   
> http://patchwork.ozlabs.org/project/openvswitch/patch/20200717015041.82746-1-hepeng.0...@bytedance.com/
> Ilya pointed out another place
>   https://mail.openvswitch.org/pipermail/ovs-dev/2021-February/380582.html
> yifeng's case is a little different (not due to ofproto = NULL, but
> due to setting
> the p->connmgr = NULL before postponed)

In my case ofproto is alive too.  The problem is destroyed ofproto->baker.
And in case of meter_id we don't really need to refcount the ofproto itself.
'baker' already has a refcount, so we could increase it and pass 'baker'
pointer to free_meter_id instead of 'ofproto'.  This function doesn't
actually need an 'ofproto' pointer.

Regarding this connmgr related patch... It's still valid even with refcounts
because destruction of connmgr is explicitly not deferred for a reason.  It's
to free the listening socket that users might want to reuse.  Refcounts will
not help here.
Also for the meters related issue.. destruct() is called for ofproto_dpif
without any postponing and I'm not sure if we need or actually able to
postpone it without consequences.  So, baker->refcount might be a better
solution keeping the ofproto->refcount only for rules.  This will reduce
the scope of changes, otherwise we will have to do a lot more work tracking
down all the users that holds 'ofproto' pointer and will miss some of them
with high probability.

I'll recheck this (connmgr) patch and, likely, apply it.   Will also review
ofproto refcount patch.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 ovn] controller: introduce coverage counters for ovn-controller incremental processing

2021-02-22 Thread Lorenzo Bianconi
> On 2/19/21 3:26 PM, Lorenzo Bianconi wrote:
> > Introduce inc-engine/stats ovs-applctl command in order to dump
> > ovn-controller incremental processing engine statistics. So far for each
> > node a counter for run, abort and engine_handler have been added.
> > Counters are incremented when the node move to "updated" state.
> > In order to dump I-P stats we can can use the following commands:
> > 
> > $ovs-appctl -t ovn-controller inc-engine/stats
> > SB_address_set
> >  run 1   abort   0   handler 0
> > addr_sets
> >  run 2   abort   1   handler 0
> > SB_port_group
> >  run 0   abort   0   handler 0
> > port_groups
> >  run 2   abort   0   handler 0
> > ofctrl_is_connected
> >  run 1   abort   0   handler 0
> > OVS_open_vswitch
> >  run 0   abort   0   handler 0
> > OVS_bridge
> >  run 0   abort   0   handler 0
> > OVS_qos
> >  run 0   abort   0   handler 0
> > SB_chassis
> >  run 1   abort   0   handler 0
> > 
> > 
> > flow_output
> >  run 2   abort   0   handler 34
> > 
> > Morover introduce the inc-engine/stats-clear command to reset engine
> > statistics
> > 
> > $ovs-appctl -t ovn-controller inc-engine/stats-clear
> > 
> > Signed-off-by: Lorenzo Bianconi 
> > ---
> 
> Hi Lorenzo,
> 
> Thanks for working on this!
> 

Hi Dumitru,

thx for the review.

> The commit title needs to be updated as we're not adding coverage counters
> anymore.
> 

ack, I will fix it in v4.

> > Changes since v2:
> > - introduce inc-engine/stats and inc-engine/stats-clear commands and drop
> >COVERAGE_* dependency
> > 
> > Changes since v1:
> > - drop handler counters and add global abort counter
> > - improve documentation and naming scheme
> > - introduce engine_set_node_updated utility macro
> > ---
> >   controller/ovn-controller.c | 53 ++---
> >   lib/inc-proc-eng.c  | 48 +
> >   lib/inc-proc-eng.h  | 26 ++
> >   3 files changed, 106 insertions(+), 21 deletions(-)
> > 
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 4343650fc..a937803c8 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -1011,7 +1011,7 @@ en_ofctrl_is_connected_run(struct engine_node *node, 
> > void *data)
> >   ofctrl_seqno_flush();
> >   binding_seqno_flush();
> >   }
> > -engine_set_node_state(node, EN_UPDATED);
> > +engine_set_note_update_from_run(node);
> >   return;
> >   }
> >   engine_set_node_state(node, EN_UNCHANGED);
> > @@ -1067,7 +1067,7 @@ en_addr_sets_run(struct engine_node *node, void *data)
> >   addr_sets_init(as_table, &as->addr_sets);
> >   as->change_tracked = false;
> > -engine_set_node_state(node, EN_UPDATED);
> > +engine_set_note_update_from_run(node);
> 
> This seems error prone to me, what if a new engine node is added and the
> wrong macro (or no macro) is used to set the state?
> 
> It's probably simpler to increment the stats in the inc-proc engine
> implementation directly:
> 
> - change handlers are only called in one place, engine_compute(): if a
> change handler sets the node state to EN_UPDATED we can increment the node's
> change handler stats.
> - run handlers are only called in one place, engine_recompute(): if a node
> handler sets the node state to EN_UPDATED we can increment the node's run
> handler stats.

ack, the code is much simpler doing so, thx :) I will fix it in v4.
> 
> >   }
> >   static bool
> > @@ -1088,7 +1088,7 @@ addr_sets_sb_address_set_handler(struct engine_node 
> > *node, void *data)
> >   if (!sset_is_empty(&as->new) || !sset_is_empty(&as->deleted) ||
> >   !sset_is_empty(&as->updated)) {
> > -engine_set_node_state(node, EN_UPDATED);
> > +engine_set_note_update_from_handler(node);
> >   } else {
> >   engine_set_node_state(node, EN_UNCHANGED);
> >   }
> > @@ -1147,7 +1147,7 @@ en_port_groups_run(struct engine_node *node, void 
> > *data)
> >   port_groups_init(pg_table, &pg->port_groups);
> >   pg->change_tracked = false;
> > -engine_set_node_state(node, EN_UPDATED);
> > +engine_set_note_update_from_run(node);
> >   }
> >   static bool
> > @@ -1168,7 +1168,7 @@ port_groups_sb_port_group_handler(struct engine_node 
> > *node, void *data)
> >   if (!sset_is_empty(&pg->new) || !sset_is_empty(&pg->deleted) ||
> >   !sset_is_empty(&pg->updated)) {
> > -engine_set_node_state(node, EN_UPDATED);
> > +engine_set_note_update_from_handler(node);
> >   } else {
> >   engine_set_node_state(node, EN_UNCHANGED);
> >   }
> > @@ -1482,7 +1482,7 @@ en_runtime_data_run(struct engine_node *node, void 
> > *data)
> >   binding_run(&b_ctx_in, &b_ct

Re: [ovs-dev] [dpdk-stable] [PATCH] doc: update stable section

2021-02-22 Thread Luca Boccassi
On Fri, 2021-02-19 at 10:23 +, Kevin Traynor wrote:
> Updating the docs to elaborate on the stable release
> characteristics and better document the current practice
> about new features in stable releases.
> 
> Signed-off-by: Kevin Traynor 
> ---
>  doc/guides/contributing/stable.rst | 33 --
>  1 file changed, 27 insertions(+), 6 deletions(-)

Acked-by: Luca Boccassi 

-- 
Kind regards,
Luca Boccassi
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] mac-learn: Fix build due to missing newline at EOF.

2021-02-22 Thread Numan Siddique
On Mon, Feb 22, 2021 at 3:27 PM Dumitru Ceara  wrote:
>
> Fixes: 2c5e546f3486 ("controller: Split mac learning code to a separate 
> file.")
> Signed-off-by: Dumitru Ceara 

Thanks for the fix. I applied this patch to master.

Numan

> ---
>  controller/mac-learn.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/controller/mac-learn.h b/controller/mac-learn.h
> index 7a7897e..e7e8ba2 100644
> --- a/controller/mac-learn.h
> +++ b/controller/mac-learn.h
> @@ -63,4 +63,4 @@ struct fdb_entry *ovn_fdb_add(struct hmap *fdbs,
>  uint32_t dp_key, struct eth_addr mac,
>  uint32_t port_key);
>
> -#endif /* OVN_MAC_LEARN_H */
> \ No newline at end of file
> +#endif /* OVN_MAC_LEARN_H */
> --
> 1.8.3.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ofctrl: Don't append actions for conj flows if called twice for same flow uuid.

2021-02-22 Thread Numan Siddique
On Mon, Feb 22, 2021 at 3:13 PM Dumitru Ceara  wrote:
>
> On 2/22/21 10:23 AM, Numan Siddique wrote:
> > On Mon, Feb 22, 2021 at 2:17 PM Dumitru Ceara  wrote:
> >>
> >> On 2/21/21 12:34 PM, num...@ovn.org wrote:
> >>> From: Numan Siddique 
> >>>
> >>> In one of the scaled deployments, ovn-controller is asserting with the
> >>> below stack trace
> >>>
> >>>   ***
> >>>(gdb) bt
> >>>  0  raise () from /lib64/libc.so.6
> >>>  1  abort () from /lib64/libc.so.6
> >>>  2  ovs_abort_valist ("%s: assertion %s failed in %s()") at 
> >>> lib/util.c:419
> >>>  3  vlog_abort_valist ("%s: assertion %s failed in %s()") at 
> >>> lib/vlog.c:1249
> >>>  4  vlog_abort ("%s: assertion %s failed in %s()") at 
> >>> lib/vlog.c:1263
> >>>  5  ovs_assert_failure (where="controller/ofctrl.c:1216",
> >>> function="flood_remove_flows_for_sb_uuid",
> >>> 
> >>> condition="ovs_list_is_empty(&f->list_node)") at lib/util.c:86
> >>>  6  flood_remove_flows_for_sb_uuid (sb_uuid=...134,
> >>>   flood_remove_nodes=...ef0) at controller/ofctrl.c:1216
> >>>  9  ofctrl_flood_remove_flows (flood_remove_nodes=...ef0) at 
> >>> controller/ofctrl.c:1280
> >>>  10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
> >>>   ref_name= "5564_pg_14...aac") at controller/lflow.c:522
> >>>  11 _flow_output_resource_ref_handler 
> >>> (ref_type=REF_TYPE_PORTGROUP)
> >>>   at controller/ovn-controller.c:2220
> >>>  12 engine_compute () at lib/inc-proc-eng.c:306
> >>>  13 engine_run_node (recompute_allowed=true) at 
> >>> lib/inc-proc-eng.c:352
> >>>  14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377
> >>>  15 main () at controller/ovn-controller.c:2887
> >>>   ***
> >>>
> >>> The reason for this assert is different from the assertion bug fixed in
> >>> the commit 858d1dd716("ofctrl: Fix the assert seen when flood removing
> >>> flows.")
> >>>
> >>> The above assertion is seen if lflow.c calls ofctrl_add_or_append_flow()
> >>> twice for the same flow uuid 'S'.  When this function is called for
> >>> the first time, a new desired flow 'f' is created and 'f->references'
> >>> will have [(S, f)].  When it is called for the 2nd time, the list
> >>> 'f->references' is updated as [(S, f), (S,f)].  Later when
> >>> flood_remove_flows_for_sb_uuid() is called for 'S', the above assertion
> >>> is seen.
> >>>
> >>> This scenarion can happen when ovn-controller receives an update message
> >>> from SB ovsdb-server in which a new logical flow 'F' belonging to a
> >>> datapath 'D' is added which would result in conjunction flows.  If this
> >>> datapath 'D' is added to 'local_datapaths' in the same loop, then
> >>> logical flow 'F' is first processed in lflow_add_flows_for_datapath()
> >>> and the second time in lflow_handle_changed_flows().
> >>>
> >>> This issue can be fixed in 2 ways
> >>> 1. Flood remove all the tracked flows in lflow_handle_changed_flows()
> >>>  first and then process the modified or newly added logical flows.
> >>>
> >>> 2. In the function ofctrl_add_or_append_flow(), check if there is
> >>>  already a reference for the flow uuid 'S' in the existing desired
> >>>  flows and only append the actions if it doesn't exist.
> >>>
> >>> This patch has taken the approach (2) as that seems better and
> >>> effecient.
> >>>
> >>> Signed-off-by: Numan Siddique 
> >>> ---
> >>
> >> Hi Numan,
> >>
> >> Nice work tracking this down!
> >>
> >> This is not a full review, just an initial question to see if there's
> >> also an option #3 to fix this issue:
> >>
> >> Can we try to avoid processing the same lflow twice?  E.g., in
> >> lflow_add_flows_for_datapath(), call consider_logical_flow__() only for
> >> logical flows that are not on the table's 'track_list', so have not
> >> changed in the SB.
> >
> > Hi Dumitru,
> >
> > Thanks for the comments. I think this would require running  a for
> > loop of tracked
> > flows for every logical flow for the datapath and checking if it is in
> > the tracked list or not.
>
> OVS IDL doesn't explicitly expose an API to check if a record has been
> changed (i.e., is on the track list).  We could add one, and use it in
> lflow_add_flows_for_datapath() when iterating over lflows, to just skip
> records that were marked as updates already.
>
> > I think we could do this too. But in my opinion we should support
> > ofctrl_add_or_append_flow() to be called twice for the same uuid.
>
> I'm not sure about this.  Why should we support adding the same flow
> twice?  It seems like a bug to me.

Sorry for the confusion.  What I mean is if the user calls it twice, we
should warn about it and return.

Like how we do here -
https://github.com/ovn-org/ovn/blob/master/controller/ofctrl.c#L1037
in ofctrl_check_and_add_flow().


>
> > When ofctrl_add_or_append_flow() is called twice, it 

Re: [ovs-dev] [PATCH v3 ovn] controller: introduce coverage counters for ovn-controller incremental processing

2021-02-22 Thread Dumitru Ceara

On 2/19/21 3:26 PM, Lorenzo Bianconi wrote:

Introduce inc-engine/stats ovs-applctl command in order to dump
ovn-controller incremental processing engine statistics. So far for each
node a counter for run, abort and engine_handler have been added.
Counters are incremented when the node move to "updated" state.
In order to dump I-P stats we can can use the following commands:

$ovs-appctl -t ovn-controller inc-engine/stats
SB_address_set
 run 1   abort   0   handler 0
addr_sets
 run 2   abort   1   handler 0
SB_port_group
 run 0   abort   0   handler 0
port_groups
 run 2   abort   0   handler 0
ofctrl_is_connected
 run 1   abort   0   handler 0
OVS_open_vswitch
 run 0   abort   0   handler 0
OVS_bridge
 run 0   abort   0   handler 0
OVS_qos
 run 0   abort   0   handler 0
SB_chassis
 run 1   abort   0   handler 0


flow_output
 run 2   abort   0   handler 34

Morover introduce the inc-engine/stats-clear command to reset engine
statistics

$ovs-appctl -t ovn-controller inc-engine/stats-clear

Signed-off-by: Lorenzo Bianconi 
---


Hi Lorenzo,

Thanks for working on this!

The commit title needs to be updated as we're not adding coverage 
counters anymore.



Changes since v2:
- introduce inc-engine/stats and inc-engine/stats-clear commands and drop
   COVERAGE_* dependency

Changes since v1:
- drop handler counters and add global abort counter
- improve documentation and naming scheme
- introduce engine_set_node_updated utility macro
---
  controller/ovn-controller.c | 53 ++---
  lib/inc-proc-eng.c  | 48 +
  lib/inc-proc-eng.h  | 26 ++
  3 files changed, 106 insertions(+), 21 deletions(-)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 4343650fc..a937803c8 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1011,7 +1011,7 @@ en_ofctrl_is_connected_run(struct engine_node *node, void 
*data)
  ofctrl_seqno_flush();
  binding_seqno_flush();
  }
-engine_set_node_state(node, EN_UPDATED);
+engine_set_note_update_from_run(node);
  return;
  }
  engine_set_node_state(node, EN_UNCHANGED);
@@ -1067,7 +1067,7 @@ en_addr_sets_run(struct engine_node *node, void *data)
  addr_sets_init(as_table, &as->addr_sets);
  
  as->change_tracked = false;

-engine_set_node_state(node, EN_UPDATED);
+engine_set_note_update_from_run(node);


This seems error prone to me, what if a new engine node is added and the 
wrong macro (or no macro) is used to set the state?


It's probably simpler to increment the stats in the inc-proc engine 
implementation directly:


- change handlers are only called in one place, engine_compute(): if a 
change handler sets the node state to EN_UPDATED we can increment the 
node's change handler stats.
- run handlers are only called in one place, engine_recompute(): if a 
node handler sets the node state to EN_UPDATED we can increment the 
node's run handler stats.



  }
  
  static bool

@@ -1088,7 +1088,7 @@ addr_sets_sb_address_set_handler(struct engine_node 
*node, void *data)
  
  if (!sset_is_empty(&as->new) || !sset_is_empty(&as->deleted) ||

  !sset_is_empty(&as->updated)) {
-engine_set_node_state(node, EN_UPDATED);
+engine_set_note_update_from_handler(node);
  } else {
  engine_set_node_state(node, EN_UNCHANGED);
  }
@@ -1147,7 +1147,7 @@ en_port_groups_run(struct engine_node *node, void *data)
  port_groups_init(pg_table, &pg->port_groups);
  
  pg->change_tracked = false;

-engine_set_node_state(node, EN_UPDATED);
+engine_set_note_update_from_run(node);
  }
  
  static bool

@@ -1168,7 +1168,7 @@ port_groups_sb_port_group_handler(struct engine_node 
*node, void *data)
  
  if (!sset_is_empty(&pg->new) || !sset_is_empty(&pg->deleted) ||

  !sset_is_empty(&pg->updated)) {
-engine_set_node_state(node, EN_UPDATED);
+engine_set_note_update_from_handler(node);
  } else {
  engine_set_node_state(node, EN_UNCHANGED);
  }
@@ -1482,7 +1482,7 @@ en_runtime_data_run(struct engine_node *node, void *data)
  
  binding_run(&b_ctx_in, &b_ctx_out);
  
-engine_set_node_state(node, EN_UPDATED);

+engine_set_note_update_from_run(node);
  }
  
  static bool

@@ -1500,7 +1500,7 @@ runtime_data_ovs_interface_handler(struct engine_node 
*node, void *data)
  }
  
  if (b_ctx_out.local_lports_changed) {

-engine_set_node_state(node, EN_UPDATED);
+engine_set_note_update_from_handler(node);
  rt_data->local_lports_changed = b_ctx_out.local_lports_changed;
  }
  
@@ -1528,7 +1528,7 @@ runtime_data_sb_port_binding_handler(struct engine_node

[ovs-dev] [PATCH ovn] mac-learn: Fix build due to missing newline at EOF.

2021-02-22 Thread Dumitru Ceara
Fixes: 2c5e546f3486 ("controller: Split mac learning code to a separate file.")
Signed-off-by: Dumitru Ceara 
---
 controller/mac-learn.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/controller/mac-learn.h b/controller/mac-learn.h
index 7a7897e..e7e8ba2 100644
--- a/controller/mac-learn.h
+++ b/controller/mac-learn.h
@@ -63,4 +63,4 @@ struct fdb_entry *ovn_fdb_add(struct hmap *fdbs,
 uint32_t dp_key, struct eth_addr mac,
 uint32_t port_key);
 
-#endif /* OVN_MAC_LEARN_H */
\ No newline at end of file
+#endif /* OVN_MAC_LEARN_H */
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ofctrl: Don't append actions for conj flows if called twice for same flow uuid.

2021-02-22 Thread Dumitru Ceara

On 2/22/21 10:37 AM, Numan Siddique wrote:

On Mon, Feb 22, 2021 at 2:53 PM Numan Siddique  wrote:


On Mon, Feb 22, 2021 at 2:17 PM Dumitru Ceara  wrote:


On 2/21/21 12:34 PM, num...@ovn.org wrote:

From: Numan Siddique 

In one of the scaled deployments, ovn-controller is asserting with the
below stack trace

  ***
   (gdb) bt
 0  raise () from /lib64/libc.so.6
 1  abort () from /lib64/libc.so.6
 2  ovs_abort_valist ("%s: assertion %s failed in %s()") at 
lib/util.c:419
 3  vlog_abort_valist ("%s: assertion %s failed in %s()") at 
lib/vlog.c:1249
 4  vlog_abort ("%s: assertion %s failed in %s()") at lib/vlog.c:1263
 5  ovs_assert_failure (where="controller/ofctrl.c:1216",
function="flood_remove_flows_for_sb_uuid",
condition="ovs_list_is_empty(&f->list_node)") 
at lib/util.c:86
 6  flood_remove_flows_for_sb_uuid (sb_uuid=...134,
  flood_remove_nodes=...ef0) at controller/ofctrl.c:1216
 9  ofctrl_flood_remove_flows (flood_remove_nodes=...ef0) at 
controller/ofctrl.c:1280
 10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
  ref_name= "5564_pg_14...aac") at controller/lflow.c:522
 11 _flow_output_resource_ref_handler (ref_type=REF_TYPE_PORTGROUP)
  at controller/ovn-controller.c:2220
 12 engine_compute () at lib/inc-proc-eng.c:306
 13 engine_run_node (recompute_allowed=true) at lib/inc-proc-eng.c:352
 14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377
 15 main () at controller/ovn-controller.c:2887
  ***

The reason for this assert is different from the assertion bug fixed in
the commit 858d1dd716("ofctrl: Fix the assert seen when flood removing
flows.")

The above assertion is seen if lflow.c calls ofctrl_add_or_append_flow()
twice for the same flow uuid 'S'.  When this function is called for
the first time, a new desired flow 'f' is created and 'f->references'
will have [(S, f)].  When it is called for the 2nd time, the list
'f->references' is updated as [(S, f), (S,f)].  Later when
flood_remove_flows_for_sb_uuid() is called for 'S', the above assertion
is seen.

This scenarion can happen when ovn-controller receives an update message
from SB ovsdb-server in which a new logical flow 'F' belonging to a
datapath 'D' is added which would result in conjunction flows.  If this
datapath 'D' is added to 'local_datapaths' in the same loop, then
logical flow 'F' is first processed in lflow_add_flows_for_datapath()
and the second time in lflow_handle_changed_flows().

This issue can be fixed in 2 ways
1. Flood remove all the tracked flows in lflow_handle_changed_flows()
 first and then process the modified or newly added logical flows.

2. In the function ofctrl_add_or_append_flow(), check if there is
 already a reference for the flow uuid 'S' in the existing desired
 flows and only append the actions if it doesn't exist.

This patch has taken the approach (2) as that seems better and
effecient.

Signed-off-by: Numan Siddique 
---


Hi Numan,

Nice work tracking this down!

This is not a full review, just an initial question to see if there's
also an option #3 to fix this issue:

Can we try to avoid processing the same lflow twice?  E.g., in
lflow_add_flows_for_datapath(), call consider_logical_flow__() only for
logical flows that are not on the table's 'track_list', so have not
changed in the SB.


Hi Dumitru,

Thanks for the comments. I think this would require running  a for
loop of tracked
flows for every logical flow for the datapath and checking if it is in
the tracked list or not.
I think we could do this too. But in my opinion we should support
ofctrl_add_or_append_flow() to be called twice for the same uuid.
When ofctrl_add_or_append_flow() is called twice, it should result in a no-op.

We could end up in a similar scenario later when incrementally processing
other changes.

In my opinion we can take (2) to not result in this assert scenario.
We can also take your suggested approach (3) so that we don't call
ofctrl_add_or_append_flow() twice for the reported scenario.

What do you think?


One problem I see with #3 is that, this makes the function
lflow_add_flows_for_datapath()
assume that the function lflow_handle_changed_flows() will be called
later and also enforces
the order of the I-P engine nodes registered to the flow_output.  In my opinion
we should avoid this assumption.



That's a fair point indeed.  But if we go for (2) we also increase 
complexity in the already complex ofctrl_*() code.


I'm not sure yet what's best :)

Regards,
Dumitru

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ofctrl: Don't append actions for conj flows if called twice for same flow uuid.

2021-02-22 Thread Dumitru Ceara

On 2/22/21 10:23 AM, Numan Siddique wrote:

On Mon, Feb 22, 2021 at 2:17 PM Dumitru Ceara  wrote:


On 2/21/21 12:34 PM, num...@ovn.org wrote:

From: Numan Siddique 

In one of the scaled deployments, ovn-controller is asserting with the
below stack trace

  ***
   (gdb) bt
 0  raise () from /lib64/libc.so.6
 1  abort () from /lib64/libc.so.6
 2  ovs_abort_valist ("%s: assertion %s failed in %s()") at 
lib/util.c:419
 3  vlog_abort_valist ("%s: assertion %s failed in %s()") at 
lib/vlog.c:1249
 4  vlog_abort ("%s: assertion %s failed in %s()") at lib/vlog.c:1263
 5  ovs_assert_failure (where="controller/ofctrl.c:1216",
function="flood_remove_flows_for_sb_uuid",
condition="ovs_list_is_empty(&f->list_node)") 
at lib/util.c:86
 6  flood_remove_flows_for_sb_uuid (sb_uuid=...134,
  flood_remove_nodes=...ef0) at controller/ofctrl.c:1216
 9  ofctrl_flood_remove_flows (flood_remove_nodes=...ef0) at 
controller/ofctrl.c:1280
 10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
  ref_name= "5564_pg_14...aac") at controller/lflow.c:522
 11 _flow_output_resource_ref_handler (ref_type=REF_TYPE_PORTGROUP)
  at controller/ovn-controller.c:2220
 12 engine_compute () at lib/inc-proc-eng.c:306
 13 engine_run_node (recompute_allowed=true) at lib/inc-proc-eng.c:352
 14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377
 15 main () at controller/ovn-controller.c:2887
  ***

The reason for this assert is different from the assertion bug fixed in
the commit 858d1dd716("ofctrl: Fix the assert seen when flood removing
flows.")

The above assertion is seen if lflow.c calls ofctrl_add_or_append_flow()
twice for the same flow uuid 'S'.  When this function is called for
the first time, a new desired flow 'f' is created and 'f->references'
will have [(S, f)].  When it is called for the 2nd time, the list
'f->references' is updated as [(S, f), (S,f)].  Later when
flood_remove_flows_for_sb_uuid() is called for 'S', the above assertion
is seen.

This scenarion can happen when ovn-controller receives an update message
from SB ovsdb-server in which a new logical flow 'F' belonging to a
datapath 'D' is added which would result in conjunction flows.  If this
datapath 'D' is added to 'local_datapaths' in the same loop, then
logical flow 'F' is first processed in lflow_add_flows_for_datapath()
and the second time in lflow_handle_changed_flows().

This issue can be fixed in 2 ways
1. Flood remove all the tracked flows in lflow_handle_changed_flows()
 first and then process the modified or newly added logical flows.

2. In the function ofctrl_add_or_append_flow(), check if there is
 already a reference for the flow uuid 'S' in the existing desired
 flows and only append the actions if it doesn't exist.

This patch has taken the approach (2) as that seems better and
effecient.

Signed-off-by: Numan Siddique 
---


Hi Numan,

Nice work tracking this down!

This is not a full review, just an initial question to see if there's
also an option #3 to fix this issue:

Can we try to avoid processing the same lflow twice?  E.g., in
lflow_add_flows_for_datapath(), call consider_logical_flow__() only for
logical flows that are not on the table's 'track_list', so have not
changed in the SB.


Hi Dumitru,

Thanks for the comments. I think this would require running  a for
loop of tracked
flows for every logical flow for the datapath and checking if it is in
the tracked list or not.


OVS IDL doesn't explicitly expose an API to check if a record has been 
changed (i.e., is on the track list).  We could add one, and use it in 
lflow_add_flows_for_datapath() when iterating over lflows, to just skip 
records that were marked as updates already.



I think we could do this too. But in my opinion we should support
ofctrl_add_or_append_flow() to be called twice for the same uuid.


I'm not sure about this.  Why should we support adding the same flow 
twice?  It seems like a bug to me.



When ofctrl_add_or_append_flow() is called twice, it should result in a no-op.


What if we implement (3) and also add an assert or check in 
link_flow_to_sb() to not allow the same flow to be added twice to a SB uuid?




We could end up in a similar scenario later when incrementally processing
other changes.


As far as I see, we currently call ofctrl_add_or_append_flow() only for 
logical flows so, at least for now, no other incrementally processed SB 
record types can cause this issue.




In my opinion we can take (2) to not result in this assert scenario.
We can also take your suggested approach (3) so that we don't call
ofctrl_add_or_append_flow() twice for the reported scenario.


Right now ofctrl_add_or_append_flow() has the implicit requirement that 
a single flow shouldn't be added twice.  This seems reasonable to me, 
and 

Re: [ovs-dev] [PATCH ovn] ofctrl: Don't append actions for conj flows if called twice for same flow uuid.

2021-02-22 Thread Numan Siddique
On Mon, Feb 22, 2021 at 2:53 PM Numan Siddique  wrote:
>
> On Mon, Feb 22, 2021 at 2:17 PM Dumitru Ceara  wrote:
> >
> > On 2/21/21 12:34 PM, num...@ovn.org wrote:
> > > From: Numan Siddique 
> > >
> > > In one of the scaled deployments, ovn-controller is asserting with the
> > > below stack trace
> > >
> > >  ***
> > >   (gdb) bt
> > > 0  raise () from /lib64/libc.so.6
> > > 1  abort () from /lib64/libc.so.6
> > > 2  ovs_abort_valist ("%s: assertion %s failed in %s()") at 
> > > lib/util.c:419
> > > 3  vlog_abort_valist ("%s: assertion %s failed in %s()") at 
> > > lib/vlog.c:1249
> > > 4  vlog_abort ("%s: assertion %s failed in %s()") at 
> > > lib/vlog.c:1263
> > > 5  ovs_assert_failure (where="controller/ofctrl.c:1216",
> > >function="flood_remove_flows_for_sb_uuid",
> > >
> > > condition="ovs_list_is_empty(&f->list_node)") at lib/util.c:86
> > > 6  flood_remove_flows_for_sb_uuid (sb_uuid=...134,
> > >  flood_remove_nodes=...ef0) at controller/ofctrl.c:1216
> > > 9  ofctrl_flood_remove_flows (flood_remove_nodes=...ef0) at 
> > > controller/ofctrl.c:1280
> > > 10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
> > >  ref_name= "5564_pg_14...aac") at controller/lflow.c:522
> > > 11 _flow_output_resource_ref_handler (ref_type=REF_TYPE_PORTGROUP)
> > >  at controller/ovn-controller.c:2220
> > > 12 engine_compute () at lib/inc-proc-eng.c:306
> > > 13 engine_run_node (recompute_allowed=true) at 
> > > lib/inc-proc-eng.c:352
> > > 14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377
> > > 15 main () at controller/ovn-controller.c:2887
> > >  ***
> > >
> > > The reason for this assert is different from the assertion bug fixed in
> > > the commit 858d1dd716("ofctrl: Fix the assert seen when flood removing
> > > flows.")
> > >
> > > The above assertion is seen if lflow.c calls ofctrl_add_or_append_flow()
> > > twice for the same flow uuid 'S'.  When this function is called for
> > > the first time, a new desired flow 'f' is created and 'f->references'
> > > will have [(S, f)].  When it is called for the 2nd time, the list
> > > 'f->references' is updated as [(S, f), (S,f)].  Later when
> > > flood_remove_flows_for_sb_uuid() is called for 'S', the above assertion
> > > is seen.
> > >
> > > This scenarion can happen when ovn-controller receives an update message
> > > from SB ovsdb-server in which a new logical flow 'F' belonging to a
> > > datapath 'D' is added which would result in conjunction flows.  If this
> > > datapath 'D' is added to 'local_datapaths' in the same loop, then
> > > logical flow 'F' is first processed in lflow_add_flows_for_datapath()
> > > and the second time in lflow_handle_changed_flows().
> > >
> > > This issue can be fixed in 2 ways
> > > 1. Flood remove all the tracked flows in lflow_handle_changed_flows()
> > > first and then process the modified or newly added logical flows.
> > >
> > > 2. In the function ofctrl_add_or_append_flow(), check if there is
> > > already a reference for the flow uuid 'S' in the existing desired
> > > flows and only append the actions if it doesn't exist.
> > >
> > > This patch has taken the approach (2) as that seems better and
> > > effecient.
> > >
> > > Signed-off-by: Numan Siddique 
> > > ---
> >
> > Hi Numan,
> >
> > Nice work tracking this down!
> >
> > This is not a full review, just an initial question to see if there's
> > also an option #3 to fix this issue:
> >
> > Can we try to avoid processing the same lflow twice?  E.g., in
> > lflow_add_flows_for_datapath(), call consider_logical_flow__() only for
> > logical flows that are not on the table's 'track_list', so have not
> > changed in the SB.
>
> Hi Dumitru,
>
> Thanks for the comments. I think this would require running  a for
> loop of tracked
> flows for every logical flow for the datapath and checking if it is in
> the tracked list or not.
> I think we could do this too. But in my opinion we should support
> ofctrl_add_or_append_flow() to be called twice for the same uuid.
> When ofctrl_add_or_append_flow() is called twice, it should result in a no-op.
>
> We could end up in a similar scenario later when incrementally processing
> other changes.
>
> In my opinion we can take (2) to not result in this assert scenario.
> We can also take your suggested approach (3) so that we don't call
> ofctrl_add_or_append_flow() twice for the reported scenario.
>
> What do you think?

One problem I see with #3 is that, this makes the function
lflow_add_flows_for_datapath()
assume that the function lflow_handle_changed_flows() will be called
later and also enforces
the order of the I-P engine nodes registered to the flow_output.  In my opinion
we should avoid this assumption.

Thanks
Numan

>
> Thanks
> Numan
>
> >
> > Regards,
> > Dumitru
> >
> > 

Re: [ovs-dev] [PATCH ovn] ofctrl: Don't append actions for conj flows if called twice for same flow uuid.

2021-02-22 Thread Numan Siddique
On Mon, Feb 22, 2021 at 2:17 PM Dumitru Ceara  wrote:
>
> On 2/21/21 12:34 PM, num...@ovn.org wrote:
> > From: Numan Siddique 
> >
> > In one of the scaled deployments, ovn-controller is asserting with the
> > below stack trace
> >
> >  ***
> >   (gdb) bt
> > 0  raise () from /lib64/libc.so.6
> > 1  abort () from /lib64/libc.so.6
> > 2  ovs_abort_valist ("%s: assertion %s failed in %s()") at 
> > lib/util.c:419
> > 3  vlog_abort_valist ("%s: assertion %s failed in %s()") at 
> > lib/vlog.c:1249
> > 4  vlog_abort ("%s: assertion %s failed in %s()") at lib/vlog.c:1263
> > 5  ovs_assert_failure (where="controller/ofctrl.c:1216",
> >function="flood_remove_flows_for_sb_uuid",
> >
> > condition="ovs_list_is_empty(&f->list_node)") at lib/util.c:86
> > 6  flood_remove_flows_for_sb_uuid (sb_uuid=...134,
> >  flood_remove_nodes=...ef0) at controller/ofctrl.c:1216
> > 9  ofctrl_flood_remove_flows (flood_remove_nodes=...ef0) at 
> > controller/ofctrl.c:1280
> > 10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
> >  ref_name= "5564_pg_14...aac") at controller/lflow.c:522
> > 11 _flow_output_resource_ref_handler (ref_type=REF_TYPE_PORTGROUP)
> >  at controller/ovn-controller.c:2220
> > 12 engine_compute () at lib/inc-proc-eng.c:306
> > 13 engine_run_node (recompute_allowed=true) at 
> > lib/inc-proc-eng.c:352
> > 14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377
> > 15 main () at controller/ovn-controller.c:2887
> >  ***
> >
> > The reason for this assert is different from the assertion bug fixed in
> > the commit 858d1dd716("ofctrl: Fix the assert seen when flood removing
> > flows.")
> >
> > The above assertion is seen if lflow.c calls ofctrl_add_or_append_flow()
> > twice for the same flow uuid 'S'.  When this function is called for
> > the first time, a new desired flow 'f' is created and 'f->references'
> > will have [(S, f)].  When it is called for the 2nd time, the list
> > 'f->references' is updated as [(S, f), (S,f)].  Later when
> > flood_remove_flows_for_sb_uuid() is called for 'S', the above assertion
> > is seen.
> >
> > This scenarion can happen when ovn-controller receives an update message
> > from SB ovsdb-server in which a new logical flow 'F' belonging to a
> > datapath 'D' is added which would result in conjunction flows.  If this
> > datapath 'D' is added to 'local_datapaths' in the same loop, then
> > logical flow 'F' is first processed in lflow_add_flows_for_datapath()
> > and the second time in lflow_handle_changed_flows().
> >
> > This issue can be fixed in 2 ways
> > 1. Flood remove all the tracked flows in lflow_handle_changed_flows()
> > first and then process the modified or newly added logical flows.
> >
> > 2. In the function ofctrl_add_or_append_flow(), check if there is
> > already a reference for the flow uuid 'S' in the existing desired
> > flows and only append the actions if it doesn't exist.
> >
> > This patch has taken the approach (2) as that seems better and
> > effecient.
> >
> > Signed-off-by: Numan Siddique 
> > ---
>
> Hi Numan,
>
> Nice work tracking this down!
>
> This is not a full review, just an initial question to see if there's
> also an option #3 to fix this issue:
>
> Can we try to avoid processing the same lflow twice?  E.g., in
> lflow_add_flows_for_datapath(), call consider_logical_flow__() only for
> logical flows that are not on the table's 'track_list', so have not
> changed in the SB.

Hi Dumitru,

Thanks for the comments. I think this would require running  a for
loop of tracked
flows for every logical flow for the datapath and checking if it is in
the tracked list or not.
I think we could do this too. But in my opinion we should support
ofctrl_add_or_append_flow() to be called twice for the same uuid.
When ofctrl_add_or_append_flow() is called twice, it should result in a no-op.

We could end up in a similar scenario later when incrementally processing
other changes.

In my opinion we can take (2) to not result in this assert scenario.
We can also take your suggested approach (3) so that we don't call
ofctrl_add_or_append_flow() twice for the reported scenario.

What do you think?

Thanks
Numan

>
> Regards,
> Dumitru
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Does the OVS command line support on-root users?

2021-02-22 Thread wangyunjian
Hi all:
  I have a question to consult: I see that OVS daemon has
been supported to run as non-root in 2015 with the patch
"lib/daemon: support --user option for all OVS daemon",
I would like to know whether the openvswitch command line
such as ovs-vsctl/ovs-appctl will be called as non-root
in any plan, and is there any consideration for calling
as root at present?

Thanks
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ofctrl: Don't append actions for conj flows if called twice for same flow uuid.

2021-02-22 Thread Dumitru Ceara

On 2/21/21 12:34 PM, num...@ovn.org wrote:

From: Numan Siddique 

In one of the scaled deployments, ovn-controller is asserting with the
below stack trace

 ***
  (gdb) bt
0  raise () from /lib64/libc.so.6
1  abort () from /lib64/libc.so.6
2  ovs_abort_valist ("%s: assertion %s failed in %s()") at 
lib/util.c:419
3  vlog_abort_valist ("%s: assertion %s failed in %s()") at 
lib/vlog.c:1249
4  vlog_abort ("%s: assertion %s failed in %s()") at lib/vlog.c:1263
5  ovs_assert_failure (where="controller/ofctrl.c:1216",
   function="flood_remove_flows_for_sb_uuid",
   condition="ovs_list_is_empty(&f->list_node)") at 
lib/util.c:86
6  flood_remove_flows_for_sb_uuid (sb_uuid=...134,
 flood_remove_nodes=...ef0) at controller/ofctrl.c:1216
9  ofctrl_flood_remove_flows (flood_remove_nodes=...ef0) at 
controller/ofctrl.c:1280
10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
 ref_name= "5564_pg_14...aac") at controller/lflow.c:522
11 _flow_output_resource_ref_handler (ref_type=REF_TYPE_PORTGROUP)
 at controller/ovn-controller.c:2220
12 engine_compute () at lib/inc-proc-eng.c:306
13 engine_run_node (recompute_allowed=true) at lib/inc-proc-eng.c:352
14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377
15 main () at controller/ovn-controller.c:2887
 ***

The reason for this assert is different from the assertion bug fixed in
the commit 858d1dd716("ofctrl: Fix the assert seen when flood removing
flows.")

The above assertion is seen if lflow.c calls ofctrl_add_or_append_flow()
twice for the same flow uuid 'S'.  When this function is called for
the first time, a new desired flow 'f' is created and 'f->references'
will have [(S, f)].  When it is called for the 2nd time, the list
'f->references' is updated as [(S, f), (S,f)].  Later when
flood_remove_flows_for_sb_uuid() is called for 'S', the above assertion
is seen.

This scenarion can happen when ovn-controller receives an update message
from SB ovsdb-server in which a new logical flow 'F' belonging to a
datapath 'D' is added which would result in conjunction flows.  If this
datapath 'D' is added to 'local_datapaths' in the same loop, then
logical flow 'F' is first processed in lflow_add_flows_for_datapath()
and the second time in lflow_handle_changed_flows().

This issue can be fixed in 2 ways
1. Flood remove all the tracked flows in lflow_handle_changed_flows()
first and then process the modified or newly added logical flows.

2. In the function ofctrl_add_or_append_flow(), check if there is
already a reference for the flow uuid 'S' in the existing desired
flows and only append the actions if it doesn't exist.

This patch has taken the approach (2) as that seems better and
effecient.

Signed-off-by: Numan Siddique 
---


Hi Numan,

Nice work tracking this down!

This is not a full review, just an initial question to see if there's 
also an option #3 to fix this issue:


Can we try to avoid processing the same lflow twice?  E.g., in 
lflow_add_flows_for_datapath(), call consider_logical_flow__() only for 
logical flows that are not on the table's 'track_list', so have not 
changed in the SB.


Regards,
Dumitru

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ofctrl: Don't append actions for conj flows if called twice for same flow uuid.

2021-02-22 Thread Numan Siddique
On Sun, Feb 21, 2021 at 5:05 PM  wrote:
>
> From: Numan Siddique 
>
> In one of the scaled deployments, ovn-controller is asserting with the
> below stack trace
>
> ***
>  (gdb) bt
>0  raise () from /lib64/libc.so.6
>1  abort () from /lib64/libc.so.6
>2  ovs_abort_valist ("%s: assertion %s failed in %s()") at 
> lib/util.c:419
>3  vlog_abort_valist ("%s: assertion %s failed in %s()") at 
> lib/vlog.c:1249
>4  vlog_abort ("%s: assertion %s failed in %s()") at lib/vlog.c:1263
>5  ovs_assert_failure (where="controller/ofctrl.c:1216",
>   function="flood_remove_flows_for_sb_uuid",
>   condition="ovs_list_is_empty(&f->list_node)") 
> at lib/util.c:86
>6  flood_remove_flows_for_sb_uuid (sb_uuid=...134,
> flood_remove_nodes=...ef0) at controller/ofctrl.c:1216
>9  ofctrl_flood_remove_flows (flood_remove_nodes=...ef0) at 
> controller/ofctrl.c:1280
>10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP,
> ref_name= "5564_pg_14...aac") at controller/lflow.c:522
>11 _flow_output_resource_ref_handler (ref_type=REF_TYPE_PORTGROUP)
> at controller/ovn-controller.c:2220
>12 engine_compute () at lib/inc-proc-eng.c:306
>13 engine_run_node (recompute_allowed=true) at lib/inc-proc-eng.c:352
>14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377
>15 main () at controller/ovn-controller.c:2887
> ***
>
> The reason for this assert is different from the assertion bug fixed in
> the commit 858d1dd716("ofctrl: Fix the assert seen when flood removing
> flows.")
>
> The above assertion is seen if lflow.c calls ofctrl_add_or_append_flow()
> twice for the same flow uuid 'S'.  When this function is called for
> the first time, a new desired flow 'f' is created and 'f->references'
> will have [(S, f)].  When it is called for the 2nd time, the list
> 'f->references' is updated as [(S, f), (S,f)].  Later when
> flood_remove_flows_for_sb_uuid() is called for 'S', the above assertion
> is seen.
>
> This scenarion can happen when ovn-controller receives an update message
> from SB ovsdb-server in which a new logical flow 'F' belonging to a
> datapath 'D' is added which would result in conjunction flows.  If this
> datapath 'D' is added to 'local_datapaths' in the same loop, then
> logical flow 'F' is first processed in lflow_add_flows_for_datapath()
> and the second time in lflow_handle_changed_flows().
>
> This issue can be fixed in 2 ways
> 1. Flood remove all the tracked flows in lflow_handle_changed_flows()
>first and then process the modified or newly added logical flows.
>
> 2. In the function ofctrl_add_or_append_flow(), check if there is
>already a reference for the flow uuid 'S' in the existing desired
>flows and only append the actions if it doesn't exist.
>
> This patch has taken the approach (2) as that seems better and
> effecient.
>
> Signed-off-by: Numan Siddique 
> ---
>  controller/ofctrl.c |  38 +---
>  tests/ovn.at| 104 
>  2 files changed, 135 insertions(+), 7 deletions(-)
>
> diff --git a/controller/ofctrl.c b/controller/ofctrl.c
> index 415d9b7e16..ccfb8b4627 100644
> --- a/controller/ofctrl.c
> +++ b/controller/ofctrl.c
> @@ -231,6 +231,8 @@ static struct desired_flow 
> *desired_flow_lookup_conjunctive(
>  struct ovn_desired_flow_table *,
>  const struct ovn_flow *target);
>  static void desired_flow_destroy(struct desired_flow *);
> +static bool check_sb_uuid_in_flow_references(const struct desired_flow *f,
> + const struct uuid *sb_uuid);
>
>  static struct installed_flow *installed_flow_lookup(
>  const struct ovn_flow *target);
> @@ -1076,9 +1078,24 @@ ofctrl_add_or_append_flow(struct 
> ovn_desired_flow_table *desired_flows,
>  existing = desired_flow_lookup_conjunctive(desired_flows, &f->flow);
>  if (existing) {
>  /* There's already a flow with this particular match and action
> - * 'conjunction'. Append the action to that flow rather than
> - * adding a new flow.
> - */
> + * 'conjunction'.  Check that if the desired flow
> + * 'existing->references' has 'struct sb_flow_ref' entry for 
> 'sb_uuid'.
> + * If so, then log a warning and return.  Its possible that
> + * 'ofctrl_add_or_append_flow()' is called twice for the same
> + * 'sb_uuid'. */
> +if (check_sb_uuid_in_flow_references(existing, sb_uuid)) {
> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> +char *f_str = ovn_flow_to_string(&f->flow);
> +char *existingf_str = ovn_flow_to_string(&existing->flow);
> +VLOG_WARN_RL(&rl, "The existing flow [%s] already has a 
> reference "
> + "for SB UUID : "UUID_FMT". Ignoring the flow