[ovs-dev] [PATCH] vswitchd: Wait for a bridge exit before replying to exit unixctl.

2023-07-04 Thread Ilya Maximets
Before the cleanup option, the bridge_exit() call was fairly fast,
because it didn't include any particularly long operations.  However,
with the cleanup flag, this function destroys a lot of datapath
resources freeing a lot of memory, waiting on RCU and talking to
the kernel.  That may take a noticeable amount of time, especially
on a busy system or under profilers/sanitizers.  However, the unixctl
'exit' command replies instantly without waiting for any work to
actually be done.  This may cause system test failures or other
issues where scripts expect ovs-vswitchd to exit or destroy all the
datapath resources shortly after appctl call.

Fix that by waiting for the bridge_exit() before replying to the user.
At least, all the datapath resources will actually be destroyed by
the time ovs-appctl exits.

Also moving a structure from stack to global.  Seems cleaner this way.

Fixes: fe13ccdca6a2 ("vswitchd: Add --cleanup option to the 'appctl exit' 
command")
Signed-off-by: Ilya Maximets 
---
 vswitchd/ovs-vswitchd.c | 38 +++---
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
index a244d2f70..55b437871 100644
--- a/vswitchd/ovs-vswitchd.c
+++ b/vswitchd/ovs-vswitchd.c
@@ -68,19 +68,18 @@ static unixctl_cb_func ovs_vswitchd_exit;
 static char *parse_options(int argc, char *argv[], char **unixctl_path);
 OVS_NO_RETURN static void usage(void);
 
-struct ovs_vswitchd_exit_args {
-bool *exiting;
-bool *cleanup;
-};
+static struct ovs_vswitchd_exit_args {
+struct unixctl_conn *conn;
+bool exiting;
+bool cleanup;
+} exit_args;
 
 int
 main(int argc, char *argv[])
 {
-char *unixctl_path = NULL;
 struct unixctl_server *unixctl;
+char *unixctl_path = NULL;
 char *remote;
-bool exiting, cleanup;
-struct ovs_vswitchd_exit_args exit_args = {&exiting, &cleanup};
 int retval;
 
 set_program_name(argv[0]);
@@ -111,14 +110,12 @@ main(int argc, char *argv[])
 exit(EXIT_FAILURE);
 }
 unixctl_command_register("exit", "[--cleanup]", 0, 1,
- ovs_vswitchd_exit, &exit_args);
+ ovs_vswitchd_exit, NULL);
 
 bridge_init(remote);
 free(remote);
 
-exiting = false;
-cleanup = false;
-while (!exiting) {
+while (!exit_args.exiting) {
 OVS_USDT_PROBE(main, run_start);
 memory_run();
 if (memory_should_report()) {
@@ -137,16 +134,20 @@ main(int argc, char *argv[])
 bridge_wait();
 unixctl_server_wait(unixctl);
 netdev_wait();
-if (exiting) {
+if (exit_args.exiting) {
 poll_immediate_wake();
 }
 OVS_USDT_PROBE(main, poll_block);
 poll_block();
 if (should_service_stop()) {
-exiting = true;
+exit_args.exiting = true;
 }
 }
-bridge_exit(cleanup);
+bridge_exit(exit_args.cleanup);
+
+if (exit_args.conn) {
+unixctl_command_reply(exit_args.conn, NULL);
+}
 unixctl_server_destroy(unixctl);
 service_stop();
 vlog_disable_async();
@@ -304,10 +305,9 @@ usage(void)
 
 static void
 ovs_vswitchd_exit(struct unixctl_conn *conn, int argc,
-  const char *argv[], void *exit_args_)
+  const char *argv[], void *args OVS_UNUSED)
 {
-struct ovs_vswitchd_exit_args *exit_args = exit_args_;
-*exit_args->exiting = true;
-*exit_args->cleanup = argc == 2 && !strcmp(argv[1], "--cleanup");
-unixctl_command_reply(conn, NULL);
+exit_args.conn = conn;
+exit_args.exiting = true;
+exit_args.cleanup = argc == 2 && !strcmp(argv[1], "--cleanup");
 }
-- 
2.40.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] vswitchd: Wait for a bridge exit before replying to exit unixctl.

2023-07-05 Thread Eelco Chaudron



On 4 Jul 2023, at 15:11, Ilya Maximets wrote:

> Before the cleanup option, the bridge_exit() call was fairly fast,
> because it didn't include any particularly long operations.  However,
> with the cleanup flag, this function destroys a lot of datapath
> resources freeing a lot of memory, waiting on RCU and talking to
> the kernel.  That may take a noticeable amount of time, especially
> on a busy system or under profilers/sanitizers.  However, the unixctl
> 'exit' command replies instantly without waiting for any work to
> actually be done.  This may cause system test failures or other
> issues where scripts expect ovs-vswitchd to exit or destroy all the
> datapath resources shortly after appctl call.
>
> Fix that by waiting for the bridge_exit() before replying to the user.
> At least, all the datapath resources will actually be destroyed by
> the time ovs-appctl exits.
>
> Also moving a structure from stack to global.  Seems cleaner this way.

Thanks, yes it looks cleaner. One comment inline below.

//Eelco


> Fixes: fe13ccdca6a2 ("vswitchd: Add --cleanup option to the 'appctl exit' 
> command")
> Signed-off-by: Ilya Maximets 
> ---
>  vswitchd/ovs-vswitchd.c | 38 +++---
>  1 file changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
> index a244d2f70..55b437871 100644
> --- a/vswitchd/ovs-vswitchd.c
> +++ b/vswitchd/ovs-vswitchd.c
> @@ -68,19 +68,18 @@ static unixctl_cb_func ovs_vswitchd_exit;
>  static char *parse_options(int argc, char *argv[], char **unixctl_path);
>  OVS_NO_RETURN static void usage(void);
>
> -struct ovs_vswitchd_exit_args {
> -bool *exiting;
> -bool *cleanup;
> -};
> +static struct ovs_vswitchd_exit_args {
> +struct unixctl_conn *conn;
> +bool exiting;
> +bool cleanup;
> +} exit_args;
>
>  int
>  main(int argc, char *argv[])
>  {
> -char *unixctl_path = NULL;
>  struct unixctl_server *unixctl;
> +char *unixctl_path = NULL;
>  char *remote;
> -bool exiting, cleanup;
> -struct ovs_vswitchd_exit_args exit_args = {&exiting, &cleanup};
>  int retval;
>
>  set_program_name(argv[0]);
> @@ -111,14 +110,12 @@ main(int argc, char *argv[])
>  exit(EXIT_FAILURE);
>  }
>  unixctl_command_register("exit", "[--cleanup]", 0, 1,
> - ovs_vswitchd_exit, &exit_args);
> + ovs_vswitchd_exit, NULL);
>
>  bridge_init(remote);
>  free(remote);
>
> -exiting = false;
> -cleanup = false;
> -while (!exiting) {
> +while (!exit_args.exiting) {
>  OVS_USDT_PROBE(main, run_start);
>  memory_run();
>  if (memory_should_report()) {
> @@ -137,16 +134,20 @@ main(int argc, char *argv[])
>  bridge_wait();
>  unixctl_server_wait(unixctl);
>  netdev_wait();
> -if (exiting) {
> +if (exit_args.exiting) {
>  poll_immediate_wake();
>  }
>  OVS_USDT_PROBE(main, poll_block);
>  poll_block();
>  if (should_service_stop()) {
> -exiting = true;
> +exit_args.exiting = true;
>  }
>  }
> -bridge_exit(cleanup);
> +bridge_exit(exit_args.cleanup);
> +
> +if (exit_args.conn) {
> +unixctl_command_reply(exit_args.conn, NULL);
> +}
>  unixctl_server_destroy(unixctl);
>  service_stop();
>  vlog_disable_async();
> @@ -304,10 +305,9 @@ usage(void)
>
>  static void
>  ovs_vswitchd_exit(struct unixctl_conn *conn, int argc,
> -  const char *argv[], void *exit_args_)
> +  const char *argv[], void *args OVS_UNUSED)
>  {
> -struct ovs_vswitchd_exit_args *exit_args = exit_args_;
> -*exit_args->exiting = true;
> -*exit_args->cleanup = argc == 2 && !strcmp(argv[1], "--cleanup");
> -unixctl_command_reply(conn, NULL);
> +exit_args.conn = conn;

Should we try to protect from two exit command in the same unixctl_server_run()?
Something like:

if (exit_args.conn) {
 unixctl_command_reply(conn, NULL);
} else {
exit_args.conn = conn;
}

> +exit_args.exiting = true;
> +exit_args.cleanup = argc == 2 && !strcmp(argv[1], "--cleanup");
>  }
> -- 
> 2.40.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] vswitchd: Wait for a bridge exit before replying to exit unixctl.

2023-07-07 Thread Ilya Maximets
On 7/5/23 15:54, Eelco Chaudron wrote:
> 
> 
> On 4 Jul 2023, at 15:11, Ilya Maximets wrote:
> 
>> Before the cleanup option, the bridge_exit() call was fairly fast,
>> because it didn't include any particularly long operations.  However,
>> with the cleanup flag, this function destroys a lot of datapath
>> resources freeing a lot of memory, waiting on RCU and talking to
>> the kernel.  That may take a noticeable amount of time, especially
>> on a busy system or under profilers/sanitizers.  However, the unixctl
>> 'exit' command replies instantly without waiting for any work to
>> actually be done.  This may cause system test failures or other
>> issues where scripts expect ovs-vswitchd to exit or destroy all the
>> datapath resources shortly after appctl call.
>>
>> Fix that by waiting for the bridge_exit() before replying to the user.
>> At least, all the datapath resources will actually be destroyed by
>> the time ovs-appctl exits.
>>
>> Also moving a structure from stack to global.  Seems cleaner this way.
> 
> Thanks, yes it looks cleaner. One comment inline below.
> 
> //Eelco
> 
> 
>> Fixes: fe13ccdca6a2 ("vswitchd: Add --cleanup option to the 'appctl exit' 
>> command")
>> Signed-off-by: Ilya Maximets 
>> ---
>>  vswitchd/ovs-vswitchd.c | 38 +++---
>>  1 file changed, 19 insertions(+), 19 deletions(-)
>>
>> diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
>> index a244d2f70..55b437871 100644
>> --- a/vswitchd/ovs-vswitchd.c
>> +++ b/vswitchd/ovs-vswitchd.c
>> @@ -68,19 +68,18 @@ static unixctl_cb_func ovs_vswitchd_exit;
>>  static char *parse_options(int argc, char *argv[], char **unixctl_path);
>>  OVS_NO_RETURN static void usage(void);
>>
>> -struct ovs_vswitchd_exit_args {
>> -bool *exiting;
>> -bool *cleanup;
>> -};
>> +static struct ovs_vswitchd_exit_args {
>> +struct unixctl_conn *conn;
>> +bool exiting;
>> +bool cleanup;
>> +} exit_args;
>>
>>  int
>>  main(int argc, char *argv[])
>>  {
>> -char *unixctl_path = NULL;
>>  struct unixctl_server *unixctl;
>> +char *unixctl_path = NULL;
>>  char *remote;
>> -bool exiting, cleanup;
>> -struct ovs_vswitchd_exit_args exit_args = {&exiting, &cleanup};
>>  int retval;
>>
>>  set_program_name(argv[0]);
>> @@ -111,14 +110,12 @@ main(int argc, char *argv[])
>>  exit(EXIT_FAILURE);
>>  }
>>  unixctl_command_register("exit", "[--cleanup]", 0, 1,
>> - ovs_vswitchd_exit, &exit_args);
>> + ovs_vswitchd_exit, NULL);
>>
>>  bridge_init(remote);
>>  free(remote);
>>
>> -exiting = false;
>> -cleanup = false;
>> -while (!exiting) {
>> +while (!exit_args.exiting) {
>>  OVS_USDT_PROBE(main, run_start);
>>  memory_run();
>>  if (memory_should_report()) {
>> @@ -137,16 +134,20 @@ main(int argc, char *argv[])
>>  bridge_wait();
>>  unixctl_server_wait(unixctl);
>>  netdev_wait();
>> -if (exiting) {
>> +if (exit_args.exiting) {
>>  poll_immediate_wake();
>>  }
>>  OVS_USDT_PROBE(main, poll_block);
>>  poll_block();
>>  if (should_service_stop()) {
>> -exiting = true;
>> +exit_args.exiting = true;
>>  }
>>  }
>> -bridge_exit(cleanup);
>> +bridge_exit(exit_args.cleanup);
>> +
>> +if (exit_args.conn) {
>> +unixctl_command_reply(exit_args.conn, NULL);
>> +}
>>  unixctl_server_destroy(unixctl);
>>  service_stop();
>>  vlog_disable_async();
>> @@ -304,10 +305,9 @@ usage(void)
>>
>>  static void
>>  ovs_vswitchd_exit(struct unixctl_conn *conn, int argc,
>> -  const char *argv[], void *exit_args_)
>> +  const char *argv[], void *args OVS_UNUSED)
>>  {
>> -struct ovs_vswitchd_exit_args *exit_args = exit_args_;
>> -*exit_args->exiting = true;
>> -*exit_args->cleanup = argc == 2 && !strcmp(argv[1], "--cleanup");
>> -unixctl_command_reply(conn, NULL);
>> +exit_args.conn = conn;
> 
> Should we try to protect from two exit command in the same 
> unixctl_server_run()?
> Something like:
> 
> if (exit_args.conn) {
>  unixctl_command_reply(conn, NULL);
> } else {
> exit_args.conn = conn;
> }
> 

Good point.  It's unlikely, but can happen.
We could either do what you suggested or store an array of connections
and try to reply to all of them.  It becomes a bit inconsistent though
if different calls have different options.

What do you think?

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] vswitchd: Wait for a bridge exit before replying to exit unixctl.

2023-07-07 Thread Eelco Chaudron



On 7 Jul 2023, at 12:58, Ilya Maximets wrote:

> On 7/5/23 15:54, Eelco Chaudron wrote:
>>
>>
>> On 4 Jul 2023, at 15:11, Ilya Maximets wrote:
>>
>>> Before the cleanup option, the bridge_exit() call was fairly fast,
>>> because it didn't include any particularly long operations.  However,
>>> with the cleanup flag, this function destroys a lot of datapath
>>> resources freeing a lot of memory, waiting on RCU and talking to
>>> the kernel.  That may take a noticeable amount of time, especially
>>> on a busy system or under profilers/sanitizers.  However, the unixctl
>>> 'exit' command replies instantly without waiting for any work to
>>> actually be done.  This may cause system test failures or other
>>> issues where scripts expect ovs-vswitchd to exit or destroy all the
>>> datapath resources shortly after appctl call.
>>>
>>> Fix that by waiting for the bridge_exit() before replying to the user.
>>> At least, all the datapath resources will actually be destroyed by
>>> the time ovs-appctl exits.
>>>
>>> Also moving a structure from stack to global.  Seems cleaner this way.
>>
>> Thanks, yes it looks cleaner. One comment inline below.
>>
>> //Eelco
>>
>>
>>> Fixes: fe13ccdca6a2 ("vswitchd: Add --cleanup option to the 'appctl exit' 
>>> command")
>>> Signed-off-by: Ilya Maximets 
>>> ---
>>>  vswitchd/ovs-vswitchd.c | 38 +++---
>>>  1 file changed, 19 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
>>> index a244d2f70..55b437871 100644
>>> --- a/vswitchd/ovs-vswitchd.c
>>> +++ b/vswitchd/ovs-vswitchd.c
>>> @@ -68,19 +68,18 @@ static unixctl_cb_func ovs_vswitchd_exit;
>>>  static char *parse_options(int argc, char *argv[], char **unixctl_path);
>>>  OVS_NO_RETURN static void usage(void);
>>>
>>> -struct ovs_vswitchd_exit_args {
>>> -bool *exiting;
>>> -bool *cleanup;
>>> -};
>>> +static struct ovs_vswitchd_exit_args {
>>> +struct unixctl_conn *conn;
>>> +bool exiting;
>>> +bool cleanup;
>>> +} exit_args;
>>>
>>>  int
>>>  main(int argc, char *argv[])
>>>  {
>>> -char *unixctl_path = NULL;
>>>  struct unixctl_server *unixctl;
>>> +char *unixctl_path = NULL;
>>>  char *remote;
>>> -bool exiting, cleanup;
>>> -struct ovs_vswitchd_exit_args exit_args = {&exiting, &cleanup};
>>>  int retval;
>>>
>>>  set_program_name(argv[0]);
>>> @@ -111,14 +110,12 @@ main(int argc, char *argv[])
>>>  exit(EXIT_FAILURE);
>>>  }
>>>  unixctl_command_register("exit", "[--cleanup]", 0, 1,
>>> - ovs_vswitchd_exit, &exit_args);
>>> + ovs_vswitchd_exit, NULL);
>>>
>>>  bridge_init(remote);
>>>  free(remote);
>>>
>>> -exiting = false;
>>> -cleanup = false;
>>> -while (!exiting) {
>>> +while (!exit_args.exiting) {
>>>  OVS_USDT_PROBE(main, run_start);
>>>  memory_run();
>>>  if (memory_should_report()) {
>>> @@ -137,16 +134,20 @@ main(int argc, char *argv[])
>>>  bridge_wait();
>>>  unixctl_server_wait(unixctl);
>>>  netdev_wait();
>>> -if (exiting) {
>>> +if (exit_args.exiting) {
>>>  poll_immediate_wake();
>>>  }
>>>  OVS_USDT_PROBE(main, poll_block);
>>>  poll_block();
>>>  if (should_service_stop()) {
>>> -exiting = true;
>>> +exit_args.exiting = true;
>>>  }
>>>  }
>>> -bridge_exit(cleanup);
>>> +bridge_exit(exit_args.cleanup);
>>> +
>>> +if (exit_args.conn) {
>>> +unixctl_command_reply(exit_args.conn, NULL);
>>> +}
>>>  unixctl_server_destroy(unixctl);
>>>  service_stop();
>>>  vlog_disable_async();
>>> @@ -304,10 +305,9 @@ usage(void)
>>>
>>>  static void
>>>  ovs_vswitchd_exit(struct unixctl_conn *conn, int argc,
>>> -  const char *argv[], void *exit_args_)
>>> +  const char *argv[], void *args OVS_UNUSED)
>>>  {
>>> -struct ovs_vswitchd_exit_args *exit_args = exit_args_;
>>> -*exit_args->exiting = true;
>>> -*exit_args->cleanup = argc == 2 && !strcmp(argv[1], "--cleanup");
>>> -unixctl_command_reply(conn, NULL);
>>> +exit_args.conn = conn;
>>
>> Should we try to protect from two exit command in the same 
>> unixctl_server_run()?
>> Something like:
>>
>> if (exit_args.conn) {
>>  unixctl_command_reply(conn, NULL);
>> } else {
>> exit_args.conn = conn;
>> }
>>
>
> Good point.  It's unlikely, but can happen.
> We could either do what you suggested or store an array of connections
> and try to reply to all of them.  It becomes a bit inconsistent though
> if different calls have different options.
>
> What do you think?

I was thinking of an array also but what happens with the one not fitting in 
the array!?

However giving it another thought, it might just break with a pipe error and 
quit. So maybe we should just reply to the first one and l

Re: [ovs-dev] [PATCH] vswitchd: Wait for a bridge exit before replying to exit unixctl.

2023-07-07 Thread Ilya Maximets
On 7/7/23 14:12, Eelco Chaudron wrote:
> 
> 
> On 7 Jul 2023, at 12:58, Ilya Maximets wrote:
> 
>> On 7/5/23 15:54, Eelco Chaudron wrote:
>>>
>>>
>>> On 4 Jul 2023, at 15:11, Ilya Maximets wrote:
>>>
 Before the cleanup option, the bridge_exit() call was fairly fast,
 because it didn't include any particularly long operations.  However,
 with the cleanup flag, this function destroys a lot of datapath
 resources freeing a lot of memory, waiting on RCU and talking to
 the kernel.  That may take a noticeable amount of time, especially
 on a busy system or under profilers/sanitizers.  However, the unixctl
 'exit' command replies instantly without waiting for any work to
 actually be done.  This may cause system test failures or other
 issues where scripts expect ovs-vswitchd to exit or destroy all the
 datapath resources shortly after appctl call.

 Fix that by waiting for the bridge_exit() before replying to the user.
 At least, all the datapath resources will actually be destroyed by
 the time ovs-appctl exits.

 Also moving a structure from stack to global.  Seems cleaner this way.
>>>
>>> Thanks, yes it looks cleaner. One comment inline below.
>>>
>>> //Eelco
>>>
>>>
 Fixes: fe13ccdca6a2 ("vswitchd: Add --cleanup option to the 'appctl exit' 
 command")
 Signed-off-by: Ilya Maximets 
 ---
  vswitchd/ovs-vswitchd.c | 38 +++---
  1 file changed, 19 insertions(+), 19 deletions(-)

 diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
 index a244d2f70..55b437871 100644
 --- a/vswitchd/ovs-vswitchd.c
 +++ b/vswitchd/ovs-vswitchd.c
 @@ -68,19 +68,18 @@ static unixctl_cb_func ovs_vswitchd_exit;
  static char *parse_options(int argc, char *argv[], char **unixctl_path);
  OVS_NO_RETURN static void usage(void);

 -struct ovs_vswitchd_exit_args {
 -bool *exiting;
 -bool *cleanup;
 -};
 +static struct ovs_vswitchd_exit_args {
 +struct unixctl_conn *conn;
 +bool exiting;
 +bool cleanup;
 +} exit_args;

  int
  main(int argc, char *argv[])
  {
 -char *unixctl_path = NULL;
  struct unixctl_server *unixctl;
 +char *unixctl_path = NULL;
  char *remote;
 -bool exiting, cleanup;
 -struct ovs_vswitchd_exit_args exit_args = {&exiting, &cleanup};
  int retval;

  set_program_name(argv[0]);
 @@ -111,14 +110,12 @@ main(int argc, char *argv[])
  exit(EXIT_FAILURE);
  }
  unixctl_command_register("exit", "[--cleanup]", 0, 1,
 - ovs_vswitchd_exit, &exit_args);
 + ovs_vswitchd_exit, NULL);

  bridge_init(remote);
  free(remote);

 -exiting = false;
 -cleanup = false;
 -while (!exiting) {
 +while (!exit_args.exiting) {
  OVS_USDT_PROBE(main, run_start);
  memory_run();
  if (memory_should_report()) {
 @@ -137,16 +134,20 @@ main(int argc, char *argv[])
  bridge_wait();
  unixctl_server_wait(unixctl);
  netdev_wait();
 -if (exiting) {
 +if (exit_args.exiting) {
  poll_immediate_wake();
  }
  OVS_USDT_PROBE(main, poll_block);
  poll_block();
  if (should_service_stop()) {
 -exiting = true;
 +exit_args.exiting = true;
  }
  }
 -bridge_exit(cleanup);
 +bridge_exit(exit_args.cleanup);
 +
 +if (exit_args.conn) {
 +unixctl_command_reply(exit_args.conn, NULL);
 +}
  unixctl_server_destroy(unixctl);
  service_stop();
  vlog_disable_async();
 @@ -304,10 +305,9 @@ usage(void)

  static void
  ovs_vswitchd_exit(struct unixctl_conn *conn, int argc,
 -  const char *argv[], void *exit_args_)
 +  const char *argv[], void *args OVS_UNUSED)
  {
 -struct ovs_vswitchd_exit_args *exit_args = exit_args_;
 -*exit_args->exiting = true;
 -*exit_args->cleanup = argc == 2 && !strcmp(argv[1], "--cleanup");
 -unixctl_command_reply(conn, NULL);
 +exit_args.conn = conn;
>>>
>>> Should we try to protect from two exit command in the same 
>>> unixctl_server_run()?
>>> Something like:
>>>
>>> if (exit_args.conn) {
>>>  unixctl_command_reply(conn, NULL);
>>> } else {
>>> exit_args.conn = conn;
>>> }
>>>
>>
>> Good point.  It's unlikely, but can happen.
>> We could either do what you suggested or store an array of connections
>> and try to reply to all of them.  It becomes a bit inconsistent though
>> if different calls have different options.
>>
>> What do you think?
> 
> I was thinking of an array also but what h

Re: [ovs-dev] [PATCH] vswitchd: Wait for a bridge exit before replying to exit unixctl.

2023-07-07 Thread Eelco Chaudron



On 7 Jul 2023, at 14:19, Ilya Maximets wrote:

> On 7/7/23 14:12, Eelco Chaudron wrote:
>>
>>
>> On 7 Jul 2023, at 12:58, Ilya Maximets wrote:
>>
>>> On 7/5/23 15:54, Eelco Chaudron wrote:


 On 4 Jul 2023, at 15:11, Ilya Maximets wrote:

> Before the cleanup option, the bridge_exit() call was fairly fast,
> because it didn't include any particularly long operations.  However,
> with the cleanup flag, this function destroys a lot of datapath
> resources freeing a lot of memory, waiting on RCU and talking to
> the kernel.  That may take a noticeable amount of time, especially
> on a busy system or under profilers/sanitizers.  However, the unixctl
> 'exit' command replies instantly without waiting for any work to
> actually be done.  This may cause system test failures or other
> issues where scripts expect ovs-vswitchd to exit or destroy all the
> datapath resources shortly after appctl call.
>
> Fix that by waiting for the bridge_exit() before replying to the user.
> At least, all the datapath resources will actually be destroyed by
> the time ovs-appctl exits.
>
> Also moving a structure from stack to global.  Seems cleaner this way.

 Thanks, yes it looks cleaner. One comment inline below.

 //Eelco


> Fixes: fe13ccdca6a2 ("vswitchd: Add --cleanup option to the 'appctl exit' 
> command")
> Signed-off-by: Ilya Maximets 
> ---
>  vswitchd/ovs-vswitchd.c | 38 +++---
>  1 file changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
> index a244d2f70..55b437871 100644
> --- a/vswitchd/ovs-vswitchd.c
> +++ b/vswitchd/ovs-vswitchd.c
> @@ -68,19 +68,18 @@ static unixctl_cb_func ovs_vswitchd_exit;
>  static char *parse_options(int argc, char *argv[], char **unixctl_path);
>  OVS_NO_RETURN static void usage(void);
>
> -struct ovs_vswitchd_exit_args {
> -bool *exiting;
> -bool *cleanup;
> -};
> +static struct ovs_vswitchd_exit_args {
> +struct unixctl_conn *conn;
> +bool exiting;
> +bool cleanup;
> +} exit_args;
>
>  int
>  main(int argc, char *argv[])
>  {
> -char *unixctl_path = NULL;
>  struct unixctl_server *unixctl;
> +char *unixctl_path = NULL;
>  char *remote;
> -bool exiting, cleanup;
> -struct ovs_vswitchd_exit_args exit_args = {&exiting, &cleanup};
>  int retval;
>
>  set_program_name(argv[0]);
> @@ -111,14 +110,12 @@ main(int argc, char *argv[])
>  exit(EXIT_FAILURE);
>  }
>  unixctl_command_register("exit", "[--cleanup]", 0, 1,
> - ovs_vswitchd_exit, &exit_args);
> + ovs_vswitchd_exit, NULL);
>
>  bridge_init(remote);
>  free(remote);
>
> -exiting = false;
> -cleanup = false;
> -while (!exiting) {
> +while (!exit_args.exiting) {
>  OVS_USDT_PROBE(main, run_start);
>  memory_run();
>  if (memory_should_report()) {
> @@ -137,16 +134,20 @@ main(int argc, char *argv[])
>  bridge_wait();
>  unixctl_server_wait(unixctl);
>  netdev_wait();
> -if (exiting) {
> +if (exit_args.exiting) {
>  poll_immediate_wake();
>  }
>  OVS_USDT_PROBE(main, poll_block);
>  poll_block();
>  if (should_service_stop()) {
> -exiting = true;
> +exit_args.exiting = true;
>  }
>  }
> -bridge_exit(cleanup);
> +bridge_exit(exit_args.cleanup);
> +
> +if (exit_args.conn) {
> +unixctl_command_reply(exit_args.conn, NULL);
> +}
>  unixctl_server_destroy(unixctl);
>  service_stop();
>  vlog_disable_async();
> @@ -304,10 +305,9 @@ usage(void)
>
>  static void
>  ovs_vswitchd_exit(struct unixctl_conn *conn, int argc,
> -  const char *argv[], void *exit_args_)
> +  const char *argv[], void *args OVS_UNUSED)
>  {
> -struct ovs_vswitchd_exit_args *exit_args = exit_args_;
> -*exit_args->exiting = true;
> -*exit_args->cleanup = argc == 2 && !strcmp(argv[1], "--cleanup");
> -unixctl_command_reply(conn, NULL);
> +exit_args.conn = conn;

 Should we try to protect from two exit command in the same 
 unixctl_server_run()?
 Something like:

 if (exit_args.conn) {
  unixctl_command_reply(conn, NULL);
 } else {
 exit_args.conn = conn;
 }

>>>
>>> Good point.  It's unlikely, but can happen.
>>> We could either do what you suggested or store an array of connections
>>> and