[PATCH 1/5] vhost-user-blk: Don't reconnect during initialisation

2021-04-22 Thread Kevin Wolf
This is a partial revert of commits 77542d43149 and bc79c87bcde.

Usually, an error during initialisation means that the configuration was
wrong. Reconnecting won't make the error go away, but just turn the
error condition into an endless loop. Avoid this and return errors
again.

Additionally, calling vhost_user_blk_disconnect() from the chardev event
handler could result in use-after-free because none of the
initialisation code expects that the device could just go away in the
middle. So removing the call fixes crashes in several places.

For example, using a num-queues setting that is incompatible with the
backend would result in a crash like this (dereferencing dev->opaque,
which is already NULL):

 #0  0x55d0a4bd in vhost_user_read_cb (source=0x568f4690, 
condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffcbf0) at 
../hw/virtio/vhost-user.c:313
 #1  0x55d950d3 in qio_channel_fd_source_dispatch 
(source=0x57c3f750, callback=0x55d0a478 , 
user_data=0x7fffcbf0) at ../io/channel-watch.c:84
 #2  0x77b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
 #3  0x77b84a98 in g_main_context_iterate.constprop () at 
/lib64/libglib-2.0.so.0
 #4  0x77b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
 #5  0x55d0a724 in vhost_user_read (dev=0x57bc62f8, 
msg=0x7fffcc50) at ../hw/virtio/vhost-user.c:402
 #6  0x55d0ee6b in vhost_user_get_config (dev=0x57bc62f8, 
config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
 #7  0x55d56d46 in vhost_dev_get_config (hdev=0x57bc62f8, 
config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
 #8  0x55cdd150 in vhost_user_blk_device_realize (dev=0x57bc60b0, 
errp=0x7fffcf90) at ../hw/block/vhost-user-blk.c:510
 #9  0x55d08f6d in virtio_device_realize (dev=0x57bc60b0, 
errp=0x7fffcff0) at ../hw/virtio/virtio.c:3660

Signed-off-by: Kevin Wolf 
---
 hw/block/vhost-user-blk.c | 54 ++-
 1 file changed, 13 insertions(+), 41 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index f5e9682703..e824b0a759 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
 VHOST_INVALID_FEATURE_BIT
 };
 
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
+
 static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
 {
 VHostUserBlk *s = VHOST_USER_BLK(vdev);
@@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
 vhost_dev_cleanup(&s->dev);
 }
 
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
- bool realized);
-
-static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
-{
-vhost_user_blk_event(opaque, event, false);
-}
-
-static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
-{
-vhost_user_blk_event(opaque, event, true);
-}
-
 static void vhost_user_blk_chr_closed_bh(void *opaque)
 {
 DeviceState *dev = opaque;
@@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
 VHostUserBlk *s = VHOST_USER_BLK(vdev);
 
 vhost_user_blk_disconnect(dev);
-qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
-vhost_user_blk_event_oper, NULL, opaque, NULL, true);
+qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
+ NULL, opaque, NULL, true);
 }
 
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
- bool realized)
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
 {
 DeviceState *dev = opaque;
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, 
QEMUChrEvent event,
 }
 break;
 case CHR_EVENT_CLOSED:
-/*
- * Closing the connection should happen differently on device
- * initialization and operation stages.
- * On initalization, we want to re-start vhost_dev initialization
- * from the very beginning right away when the connection is closed,
- * so we clean up vhost_dev on each connection closing.
- * On operation, we want to postpone vhost_dev cleanup to let the
- * other code perform its own cleanup sequence using vhost_dev data
- * (e.g. vhost_dev_set_log).
- */
-if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
+if (!runstate_check(RUN_STATE_SHUTDOWN)) {
 /*
  * A close event may happen during a read/write, but vhost
  * code assumes the vhost_dev remains setup, so delay the
@@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent 
event,
  * knowing its type (in this case vhost-user).
  */
 s->dev.start

Re: [PATCH 1/5] vhost-user-blk: Don't reconnect during initialisation

2021-04-23 Thread Denis Plotnikov

Reviewed-by: Denis Plotnikov 

On 22.04.2021 20:02, Kevin Wolf wrote:

This is a partial revert of commits 77542d43149 and bc79c87bcde.

Usually, an error during initialisation means that the configuration was
wrong. Reconnecting won't make the error go away, but just turn the
error condition into an endless loop. Avoid this and return errors
again.

Additionally, calling vhost_user_blk_disconnect() from the chardev event
handler could result in use-after-free because none of the
initialisation code expects that the device could just go away in the
middle. So removing the call fixes crashes in several places.

For example, using a num-queues setting that is incompatible with the
backend would result in a crash like this (dereferencing dev->opaque,
which is already NULL):

  #0  0x55d0a4bd in vhost_user_read_cb (source=0x568f4690, 
condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffcbf0) at 
../hw/virtio/vhost-user.c:313
  #1  0x55d950d3 in qio_channel_fd_source_dispatch (source=0x57c3f750, 
callback=0x55d0a478 , user_data=0x7fffcbf0) at 
../io/channel-watch.c:84
  #2  0x77b32a9f in g_main_context_dispatch () at 
/lib64/libglib-2.0.so.0
  #3  0x77b84a98 in g_main_context_iterate.constprop () at 
/lib64/libglib-2.0.so.0
  #4  0x77b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
  #5  0x55d0a724 in vhost_user_read (dev=0x57bc62f8, 
msg=0x7fffcc50) at ../hw/virtio/vhost-user.c:402
  #6  0x55d0ee6b in vhost_user_get_config (dev=0x57bc62f8, 
config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
  #7  0x55d56d46 in vhost_dev_get_config (hdev=0x57bc62f8, 
config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
  #8  0x55cdd150 in vhost_user_blk_device_realize (dev=0x57bc60b0, 
errp=0x7fffcf90) at ../hw/block/vhost-user-blk.c:510
  #9  0x55d08f6d in virtio_device_realize (dev=0x57bc60b0, 
errp=0x7fffcff0) at ../hw/virtio/virtio.c:3660

Signed-off-by: Kevin Wolf 
---
  hw/block/vhost-user-blk.c | 54 ++-
  1 file changed, 13 insertions(+), 41 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index f5e9682703..e824b0a759 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
  VHOST_INVALID_FEATURE_BIT
  };
  
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);

+
  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
  {
  VHostUserBlk *s = VHOST_USER_BLK(vdev);
@@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
  vhost_dev_cleanup(&s->dev);
  }
  
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,

- bool realized);
-
-static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
-{
-vhost_user_blk_event(opaque, event, false);
-}
-
-static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
-{
-vhost_user_blk_event(opaque, event, true);
-}
-
  static void vhost_user_blk_chr_closed_bh(void *opaque)
  {
  DeviceState *dev = opaque;
@@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
  VHostUserBlk *s = VHOST_USER_BLK(vdev);
  
  vhost_user_blk_disconnect(dev);

-qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
-vhost_user_blk_event_oper, NULL, opaque, NULL, true);
+qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
+ NULL, opaque, NULL, true);
  }
  
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,

- bool realized)
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
  {
  DeviceState *dev = opaque;
  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, 
QEMUChrEvent event,
  }
  break;
  case CHR_EVENT_CLOSED:
-/*
- * Closing the connection should happen differently on device
- * initialization and operation stages.
- * On initalization, we want to re-start vhost_dev initialization
- * from the very beginning right away when the connection is closed,
- * so we clean up vhost_dev on each connection closing.
- * On operation, we want to postpone vhost_dev cleanup to let the
- * other code perform its own cleanup sequence using vhost_dev data
- * (e.g. vhost_dev_set_log).
- */
-if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
+if (!runstate_check(RUN_STATE_SHUTDOWN)) {
  /*
   * A close event may happen during a read/write, but vhost
   * code assumes the vhost_dev remains setup, so delay the
@@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrE

Re: [PATCH 1/5] vhost-user-blk: Don't reconnect during initialisation

2021-04-28 Thread Raphael Norwitz
Given what you've shown with the use-after-free, I agree the changes
need to be reverted. I'm a little uneasy about removing the reconnect
logic from the device realization completely though.

On Thu, Apr 22, 2021 at 07:02:17PM +0200, Kevin Wolf wrote:
> This is a partial revert of commits 77542d43149 and bc79c87bcde.
> 
> Usually, an error during initialisation means that the configuration was
> wrong. Reconnecting won't make the error go away, but just turn the
> error condition into an endless loop. Avoid this and return errors
> again.
> 

Is that nessesarily true? As I understand it the main usecases for
device reconnect are to allow a device backend to be restarted after a
failure or to allow the backend to be upgraded without restarting the
guest. I agree - misconfiguration could be a common cause of a device
backend crashing at realize time, but couldn't there be others? Maybe
transient memory pressure?

Especially in the case where one process is connecting to many different
vhost-user-blk instances, I could imagine power-ons and incoming
migrations racing with backend restarts quite frequently. Should
these cases cause failures?

We can still hit the infinite looping case you describe post-realize.
Why should we treat pre-realize differently?

> Additionally, calling vhost_user_blk_disconnect() from the chardev event
> handler could result in use-after-free because none of the
> initialisation code expects that the device could just go away in the
> middle. So removing the call fixes crashes in several places.
> 
> For example, using a num-queues setting that is incompatible with the
> backend would result in a crash like this (dereferencing dev->opaque,
> which is already NULL):
> 
>  #0  0x55d0a4bd in vhost_user_read_cb (source=0x568f4690, 
> condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffcbf0) at 
> ../hw/virtio/vhost-user.c:313
>  #1  0x55d950d3 in qio_channel_fd_source_dispatch 
> (source=0x57c3f750, callback=0x55d0a478 , 
> user_data=0x7fffcbf0) at ../io/channel-watch.c:84
>  #2  0x77b32a9f in g_main_context_dispatch () at 
> /lib64/libglib-2.0.so.0
>  #3  0x77b84a98 in g_main_context_iterate.constprop () at 
> /lib64/libglib-2.0.so.0
>  #4  0x77b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
>  #5  0x55d0a724 in vhost_user_read (dev=0x57bc62f8, 
> msg=0x7fffcc50) at ../hw/virtio/vhost-user.c:402
>  #6  0x55d0ee6b in vhost_user_get_config (dev=0x57bc62f8, 
> config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
>  #7  0x55d56d46 in vhost_dev_get_config (hdev=0x57bc62f8, 
> config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
>  #8  0x55cdd150 in vhost_user_blk_device_realize (dev=0x57bc60b0, 
> errp=0x7fffcf90) at ../hw/block/vhost-user-blk.c:510
>  #9  0x55d08f6d in virtio_device_realize (dev=0x57bc60b0, 
> errp=0x7fffcff0) at ../hw/virtio/virtio.c:3660
> 
> Signed-off-by: Kevin Wolf 
> ---
>  hw/block/vhost-user-blk.c | 54 ++-
>  1 file changed, 13 insertions(+), 41 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index f5e9682703..e824b0a759 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
>  VHOST_INVALID_FEATURE_BIT
>  };
>  
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> +
>  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
>  {
>  VHostUserBlk *s = VHOST_USER_BLK(vdev);
> @@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
>  vhost_dev_cleanup(&s->dev);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> - bool realized);
> -
> -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
> -{
> -vhost_user_blk_event(opaque, event, false);
> -}
> -
> -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
> -{
> -vhost_user_blk_event(opaque, event, true);
> -}
> -
>  static void vhost_user_blk_chr_closed_bh(void *opaque)
>  {
>  DeviceState *dev = opaque;
> @@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
>  VHostUserBlk *s = VHOST_USER_BLK(vdev);
>  
>  vhost_user_blk_disconnect(dev);
> -qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
> -vhost_user_blk_event_oper, NULL, opaque, NULL, true);
> +qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> + NULL, opaque, NULL, true);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> - bool realized)
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
>  {
>  DeviceState *dev = opaque;
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);

Re: [PATCH 1/5] vhost-user-blk: Don't reconnect during initialisation

2021-04-28 Thread Kevin Wolf
Am 28.04.2021 um 18:52 hat Raphael Norwitz geschrieben:
> Given what you've shown with the use-after-free, I agree the changes
> need to be reverted. I'm a little uneasy about removing the reconnect
> logic from the device realization completely though.
> 
> On Thu, Apr 22, 2021 at 07:02:17PM +0200, Kevin Wolf wrote:
> > This is a partial revert of commits 77542d43149 and bc79c87bcde.
> > 
> > Usually, an error during initialisation means that the configuration was
> > wrong. Reconnecting won't make the error go away, but just turn the
> > error condition into an endless loop. Avoid this and return errors
> > again.
> > 
> 
> Is that nessesarily true? As I understand it the main usecases for
> device reconnect are to allow a device backend to be restarted after a
> failure or to allow the backend to be upgraded without restarting the
> guest. I agree - misconfiguration could be a common cause of a device
> backend crashing at realize time, but couldn't there be others? Maybe
> transient memory pressure?
> 
> Especially in the case where one process is connecting to many different
> vhost-user-blk instances, I could imagine power-ons and incoming
> migrations racing with backend restarts quite frequently. Should
> these cases cause failures?
> 
> We can still hit the infinite looping case you describe post-realize.
> Why should we treat pre-realize differently?

I think there is one main difference between realize() and later
operation, which is that we can actually deliver an error to the user
during realize(). When we're just starting QEMU and processing the CLI
arguments, failure is very obvious, and in the context of QMP
device-add, the client is actively waiting for a result, too.

Later on, there is no good way to communicate an error (writes to stderr
just end up in some logfile at best, QAPI events can be missed), and
even if there were, we would have to do something with the guest until
the user/management application actually reacts to the error. The latter
is not a problem during realize() because the device wasn't even plugged
in yet.

So I think there are good reasons why it could make sense to distinguish
initialisation and operation.

> > Additionally, calling vhost_user_blk_disconnect() from the chardev event
> > handler could result in use-after-free because none of the
> > initialisation code expects that the device could just go away in the
> > middle. So removing the call fixes crashes in several places.
> > 
> > For example, using a num-queues setting that is incompatible with the
> > backend would result in a crash like this (dereferencing dev->opaque,
> > which is already NULL):
> > 
> >  #0  0x55d0a4bd in vhost_user_read_cb (source=0x568f4690, 
> > condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffcbf0) at 
> > ../hw/virtio/vhost-user.c:313
> >  #1  0x55d950d3 in qio_channel_fd_source_dispatch 
> > (source=0x57c3f750, callback=0x55d0a478 , 
> > user_data=0x7fffcbf0) at ../io/channel-watch.c:84
> >  #2  0x77b32a9f in g_main_context_dispatch () at 
> > /lib64/libglib-2.0.so.0
> >  #3  0x77b84a98 in g_main_context_iterate.constprop () at 
> > /lib64/libglib-2.0.so.0
> >  #4  0x77b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
> >  #5  0x55d0a724 in vhost_user_read (dev=0x57bc62f8, 
> > msg=0x7fffcc50) at ../hw/virtio/vhost-user.c:402
> >  #6  0x55d0ee6b in vhost_user_get_config (dev=0x57bc62f8, 
> > config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
> >  #7  0x55d56d46 in vhost_dev_get_config (hdev=0x57bc62f8, 
> > config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
> >  #8  0x55cdd150 in vhost_user_blk_device_realize 
> > (dev=0x57bc60b0, errp=0x7fffcf90) at 
> > ../hw/block/vhost-user-blk.c:510
> >  #9  0x55d08f6d in virtio_device_realize (dev=0x57bc60b0, 
> > errp=0x7fffcff0) at ../hw/virtio/virtio.c:3660
> > 
> > Signed-off-by: Kevin Wolf 
> > ---
> >  hw/block/vhost-user-blk.c | 54 ++-
> >  1 file changed, 13 insertions(+), 41 deletions(-)
> > 
> > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > index f5e9682703..e824b0a759 100644
> > --- a/hw/block/vhost-user-blk.c
> > +++ b/hw/block/vhost-user-blk.c
> > @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
> >  VHOST_INVALID_FEATURE_BIT
> >  };
> >  
> > +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> > +
> >  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t 
> > *config)
> >  {
> >  VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > @@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
> >  vhost_dev_cleanup(&s->dev);
> >  }
> >  
> > -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> > - bool realized);
> > -
> > -static void vhost_user_blk_event_realize(void *opaque, QEM

Re: [PATCH 1/5] vhost-user-blk: Don't reconnect during initialisation

2021-04-28 Thread Raphael Norwitz
On Wed, Apr 28, 2021 at 07:31:13PM +0200, Kevin Wolf wrote:
> Am 28.04.2021 um 18:52 hat Raphael Norwitz geschrieben:
> > Given what you've shown with the use-after-free, I agree the changes
> > need to be reverted. I'm a little uneasy about removing the reconnect
> > logic from the device realization completely though.
> > 
> > On Thu, Apr 22, 2021 at 07:02:17PM +0200, Kevin Wolf wrote:
> > > This is a partial revert of commits 77542d43149 and bc79c87bcde.
> > > 
> > > Usually, an error during initialisation means that the configuration was
> > > wrong. Reconnecting won't make the error go away, but just turn the
> > > error condition into an endless loop. Avoid this and return errors
> > > again.
> > > 
> > 
> > Is that nessesarily true? As I understand it the main usecases for
> > device reconnect are to allow a device backend to be restarted after a
> > failure or to allow the backend to be upgraded without restarting the
> > guest. I agree - misconfiguration could be a common cause of a device
> > backend crashing at realize time, but couldn't there be others? Maybe
> > transient memory pressure?
> > 
> > Especially in the case where one process is connecting to many different
> > vhost-user-blk instances, I could imagine power-ons and incoming
> > migrations racing with backend restarts quite frequently. Should
> > these cases cause failures?
> > 
> > We can still hit the infinite looping case you describe post-realize.
> > Why should we treat pre-realize differently?
> 
> I think there is one main difference between realize() and later
> operation, which is that we can actually deliver an error to the user
> during realize(). When we're just starting QEMU and processing the CLI
> arguments, failure is very obvious, and in the context of QMP
> device-add, the client is actively waiting for a result, too.
> 
> Later on, there is no good way to communicate an error (writes to stderr
> just end up in some logfile at best, QAPI events can be missed), and
> even if there were, we would have to do something with the guest until
> the user/management application actually reacts to the error. The latter
> is not a problem during realize() because the device wasn't even plugged
> in yet.
> 
> So I think there are good reasons why it could make sense to distinguish
> initialisation and operation.
>

Fair enough. I'm happy in this case, provided we remain resiliant
against one-off daemon restarts during realize.

> > > Additionally, calling vhost_user_blk_disconnect() from the chardev event
> > > handler could result in use-after-free because none of the
> > > initialisation code expects that the device could just go away in the
> > > middle. So removing the call fixes crashes in several places.
> > > 
> > > For example, using a num-queues setting that is incompatible with the
> > > backend would result in a crash like this (dereferencing dev->opaque,
> > > which is already NULL):
> > > 
> > >  #0  0x55d0a4bd in vhost_user_read_cb (source=0x568f4690, 
> > > condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffcbf0) at 
> > > ../hw/virtio/vhost-user.c:313
> > >  #1  0x55d950d3 in qio_channel_fd_source_dispatch 
> > > (source=0x57c3f750, callback=0x55d0a478 , 
> > > user_data=0x7fffcbf0) at ../io/channel-watch.c:84
> > >  #2  0x77b32a9f in g_main_context_dispatch () at 
> > > /lib64/libglib-2.0.so.0
> > >  #3  0x77b84a98 in g_main_context_iterate.constprop () at 
> > > /lib64/libglib-2.0.so.0
> > >  #4  0x77b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
> > >  #5  0x55d0a724 in vhost_user_read (dev=0x57bc62f8, 
> > > msg=0x7fffcc50) at ../hw/virtio/vhost-user.c:402
> > >  #6  0x55d0ee6b in vhost_user_get_config (dev=0x57bc62f8, 
> > > config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
> > >  #7  0x55d56d46 in vhost_dev_get_config (hdev=0x57bc62f8, 
> > > config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
> > >  #8  0x55cdd150 in vhost_user_blk_device_realize 
> > > (dev=0x57bc60b0, errp=0x7fffcf90) at 
> > > ../hw/block/vhost-user-blk.c:510
> > >  #9  0x55d08f6d in virtio_device_realize (dev=0x57bc60b0, 
> > > errp=0x7fffcff0) at ../hw/virtio/virtio.c:3660
> > > 
> > > Signed-off-by: Kevin Wolf 
> > > ---
> > >  hw/block/vhost-user-blk.c | 54 ++-
> > >  1 file changed, 13 insertions(+), 41 deletions(-)
> > > 
> > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > > index f5e9682703..e824b0a759 100644
> > > --- a/hw/block/vhost-user-blk.c
> > > +++ b/hw/block/vhost-user-blk.c
> > > @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
> > >  VHOST_INVALID_FEATURE_BIT
> > >  };
> > >  
> > > +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> > > +
> > >  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t 
> > > *config)
> > >  {
> > > 

Re: [PATCH 1/5] vhost-user-blk: Don't reconnect during initialisation

2021-04-28 Thread Philippe Mathieu-Daudé
On 4/22/21 7:02 PM, Kevin Wolf wrote:
> This is a partial revert of commits 77542d43149 and bc79c87bcde.
> 
> Usually, an error during initialisation means that the configuration was
> wrong. Reconnecting won't make the error go away, but just turn the
> error condition into an endless loop. Avoid this and return errors
> again.
> 
> Additionally, calling vhost_user_blk_disconnect() from the chardev event
> handler could result in use-after-free because none of the
> initialisation code expects that the device could just go away in the

TIL initialisation wording.

> middle. So removing the call fixes crashes in several places.
> 
> For example, using a num-queues setting that is incompatible with the
> backend would result in a crash like this (dereferencing dev->opaque,
> which is already NULL):
> 
>  #0  0x55d0a4bd in vhost_user_read_cb (source=0x568f4690, 
> condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffcbf0) at 
> ../hw/virtio/vhost-user.c:313
>  #1  0x55d950d3 in qio_channel_fd_source_dispatch 
> (source=0x57c3f750, callback=0x55d0a478 , 
> user_data=0x7fffcbf0) at ../io/channel-watch.c:84
>  #2  0x77b32a9f in g_main_context_dispatch () at 
> /lib64/libglib-2.0.so.0
>  #3  0x77b84a98 in g_main_context_iterate.constprop () at 
> /lib64/libglib-2.0.so.0
>  #4  0x77b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
>  #5  0x55d0a724 in vhost_user_read (dev=0x57bc62f8, 
> msg=0x7fffcc50) at ../hw/virtio/vhost-user.c:402
>  #6  0x55d0ee6b in vhost_user_get_config (dev=0x57bc62f8, 
> config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
>  #7  0x55d56d46 in vhost_dev_get_config (hdev=0x57bc62f8, 
> config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
>  #8  0x55cdd150 in vhost_user_blk_device_realize (dev=0x57bc60b0, 
> errp=0x7fffcf90) at ../hw/block/vhost-user-blk.c:510
>  #9  0x55d08f6d in virtio_device_realize (dev=0x57bc60b0, 
> errp=0x7fffcff0) at ../hw/virtio/virtio.c:3660
> 
> Signed-off-by: Kevin Wolf 
> ---
>  hw/block/vhost-user-blk.c | 54 ++-
>  1 file changed, 13 insertions(+), 41 deletions(-)




Re: [PATCH 1/5] vhost-user-blk: Don't reconnect during initialisation

2021-04-29 Thread Kevin Wolf
Am 22.04.2021 um 19:02 hat Kevin Wolf geschrieben:
> This is a partial revert of commits 77542d43149 and bc79c87bcde.
> 
> Usually, an error during initialisation means that the configuration was
> wrong. Reconnecting won't make the error go away, but just turn the
> error condition into an endless loop. Avoid this and return errors
> again.
> 
> Additionally, calling vhost_user_blk_disconnect() from the chardev event
> handler could result in use-after-free because none of the
> initialisation code expects that the device could just go away in the
> middle. So removing the call fixes crashes in several places.
> 
> For example, using a num-queues setting that is incompatible with the
> backend would result in a crash like this (dereferencing dev->opaque,
> which is already NULL):
> 
>  #0  0x55d0a4bd in vhost_user_read_cb (source=0x568f4690, 
> condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffcbf0) at 
> ../hw/virtio/vhost-user.c:313
>  #1  0x55d950d3 in qio_channel_fd_source_dispatch 
> (source=0x57c3f750, callback=0x55d0a478 , 
> user_data=0x7fffcbf0) at ../io/channel-watch.c:84
>  #2  0x77b32a9f in g_main_context_dispatch () at 
> /lib64/libglib-2.0.so.0
>  #3  0x77b84a98 in g_main_context_iterate.constprop () at 
> /lib64/libglib-2.0.so.0
>  #4  0x77b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
>  #5  0x55d0a724 in vhost_user_read (dev=0x57bc62f8, 
> msg=0x7fffcc50) at ../hw/virtio/vhost-user.c:402
>  #6  0x55d0ee6b in vhost_user_get_config (dev=0x57bc62f8, 
> config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
>  #7  0x55d56d46 in vhost_dev_get_config (hdev=0x57bc62f8, 
> config=0x57bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
>  #8  0x55cdd150 in vhost_user_blk_device_realize (dev=0x57bc60b0, 
> errp=0x7fffcf90) at ../hw/block/vhost-user-blk.c:510
>  #9  0x55d08f6d in virtio_device_realize (dev=0x57bc60b0, 
> errp=0x7fffcff0) at ../hw/virtio/virtio.c:3660
> 
> Signed-off-by: Kevin Wolf 
> ---
>  hw/block/vhost-user-blk.c | 54 ++-
>  1 file changed, 13 insertions(+), 41 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index f5e9682703..e824b0a759 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
>  VHOST_INVALID_FEATURE_BIT
>  };
>  
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> +
>  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
>  {
>  VHostUserBlk *s = VHOST_USER_BLK(vdev);
> @@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
>  vhost_dev_cleanup(&s->dev);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> - bool realized);
> -
> -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
> -{
> -vhost_user_blk_event(opaque, event, false);
> -}
> -
> -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
> -{
> -vhost_user_blk_event(opaque, event, true);
> -}
> -
>  static void vhost_user_blk_chr_closed_bh(void *opaque)
>  {
>  DeviceState *dev = opaque;
> @@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
>  VHostUserBlk *s = VHOST_USER_BLK(vdev);
>  
>  vhost_user_blk_disconnect(dev);
> -qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
> -vhost_user_blk_event_oper, NULL, opaque, NULL, true);
> +qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> + NULL, opaque, NULL, true);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> - bool realized)
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
>  {
>  DeviceState *dev = opaque;
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, 
> QEMUChrEvent event,
>  }
>  break;
>  case CHR_EVENT_CLOSED:
> -/*
> - * Closing the connection should happen differently on device
> - * initialization and operation stages.
> - * On initalization, we want to re-start vhost_dev initialization
> - * from the very beginning right away when the connection is closed,
> - * so we clean up vhost_dev on each connection closing.
> - * On operation, we want to postpone vhost_dev cleanup to let the
> - * other code perform its own cleanup sequence using vhost_dev data
> - * (e.g. vhost_dev_set_log).
> - */
> -if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
> +if (!runstate_check(RUN_STATE_SHUTDOWN)) {
>  /*
>   * A close event may happen during a read/writ

Re: [PATCH 1/5] vhost-user-blk: Don't reconnect during initialisation

2021-04-29 Thread Raphael Norwitz
On Thu, Apr 29, 2021 at 02:48:52PM +0200, Kevin Wolf wrote:
> So maybe patch 2 should come first and also fix the preexisting bug, and
> of course this patch needs a v2 that doesn't introduce the new instances
> of the bug.

Sounds good to me.

> 
> Kevin
>