date:20230115

Re: Call qemu_socketpair() instead of socketpair() when possible

2023-01-15 Thread Thomas Huth


On 16/01/2023 05.56, Guoyi Tu wrote:

As qemu_socketpair() was introduced in commit 3c63b4e9
("oslib-posix: Introduce qemu_socketpair()"), it's time
to replace the other existing socketpair() calls with
qemu_socketpair() if possible

Signed-off-by: Guoyi Tu 
---
  backends/tpm/tpm_emulator.c | 2 +-
  tests/qtest/dbus-display-test.c | 5 +++--
  tests/qtest/migration-test.c    | 2 +-
  tests/unit/test-crypto-tlssession.c | 4 ++--
  tests/unit/test-io-channel-tls.c    | 2 +-
  5 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c
index 49cc3d749d..67e7b212e3 100644
--- a/backends/tpm/tpm_emulator.c
+++ b/backends/tpm/tpm_emulator.c
@@ -553,7 +553,7 @@ static int tpm_emulator_prepare_data_fd(TPMEmulator 
*tpm_emu)

  Error *err = NULL;
  int fds[2] = { -1, -1 };

-    if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) {
+    if (qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) {
  error_report("tpm-emulator: Failed to create socketpair");
  return -1;
  }
diff --git a/tests/qtest/dbus-display-test.c b/tests/qtest/dbus-display-test.c
index cb1b62d1d1..fef025ac6f 100644
--- a/tests/qtest/dbus-display-test.c
+++ b/tests/qtest/dbus-display-test.c
@@ -1,5 +1,6 @@
  #include "qemu/osdep.h"
  #include "qemu/dbus.h"
+#include "qemu/sockets.h"
  #include 
  #include 
  #include "libqtest.h"
@@ -36,7 +37,7 @@ test_setup(QTestState **qts, GDBusConnection **conn)

  *qts = qtest_init("-display dbus,p2p=yes -name dbus-test");

-    g_assert_cmpint(socketpair(AF_UNIX, SOCK_STREAM, 0, pair), ==, 0);
+    g_assert_cmpint(qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, pair), ==, 0);

  qtest_qmp_add_client(*qts, "@dbus-display", pair[1]);

@@ -152,7 +153,7 @@ test_dbus_display_console(void)

  test_setup(, );

-    g_assert_cmpint(socketpair(AF_UNIX, SOCK_STREAM, 0, pair), ==, 0);
+    g_assert_cmpint(qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, pair), ==, 0);
  fd_list = g_unix_fd_list_new();
  idx = g_unix_fd_list_append(fd_list, pair[1], NULL);

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index dbde726adf..1dd32c9506 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1661,7 +1661,7 @@ static void *test_migrate_fd_start_hook(QTestState *from,
  int pair[2];

  /* Create two connected sockets for migration */
-    ret = socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);
+    ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);
  g_assert_cmpint(ret, ==, 0);

  /* Send the 1st socket to the target */
diff --git a/tests/unit/test-crypto-tlssession.c 
b/tests/unit/test-crypto-tlssession.c

index 615a1344b4..b12e7b6879 100644
--- a/tests/unit/test-crypto-tlssession.c
+++ b/tests/unit/test-crypto-tlssession.c
@@ -82,7 +82,7 @@ static void test_crypto_tls_session_psk(void)
  int ret;

  /* We'll use this for our fake client-server connection */
-    ret = socketpair(AF_UNIX, SOCK_STREAM, 0, channel);
+    ret = qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, channel);
  g_assert(ret == 0);

  /*
@@ -236,7 +236,7 @@ static void test_crypto_tls_session_x509(const void 
*opaque)

  int ret;

  /* We'll use this for our fake client-server connection */
-    ret = socketpair(AF_UNIX, SOCK_STREAM, 0, channel);
+    ret = qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, channel);
  g_assert(ret == 0);

  /*
diff --git a/tests/unit/test-io-channel-tls.c 
b/tests/unit/test-io-channel-tls.c

index cc39247556..e036ac5df4 100644
--- a/tests/unit/test-io-channel-tls.c
+++ b/tests/unit/test-io-channel-tls.c
@@ -121,7 +121,7 @@ static void test_io_channel_tls(const void *opaque)
  GMainContext *mainloop;

  /* We'll use this for our fake client-server connection */
-    g_assert(socketpair(AF_UNIX, SOCK_STREAM, 0, channel) == 0);
+    g_assert(qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, channel) == 0);

  #define CLIENT_CERT_DIR "tests/test-io-channel-tls-client/"
  #define SERVER_CERT_DIR "tests/test-io-channel-tls-server/"


Acked-by: Thomas Huth

RE: [PULL v4 76/83] vhost-user: Support vhost_dev_start

2023-01-15 Thread Yajun Wu

Not quite sure about the whole picture.

Seems while qemu waiting response of vhost_user_get_status, dpdk send out 
VHOST_USER_SLAVE_IOTLB_MSG and trigger qemu function 
vhost_backend_update_device_iotlb.
Qemu wait on reply of VHOST_USER_IOTLB_MSG but get VHOST_USER_GET_STATUS reply.

Break on first error message("Received unexpected msg type. Expected 22 
received 40")

#0  0x55b72ed4 in process_message_reply (dev=0x584dd600, 
msg=0x7fffa330) at ../hw/virtio/vhost-user.c:445
#1  0x55b77c26 in vhost_user_send_device_iotlb_msg (dev=0x584dd600, 
imsg=0x7fffa600) at ../hw/virtio/vhost-user.c:2341
#2  0x55b7179e in vhost_backend_update_device_iotlb 
(dev=0x584dd600, iova=10442706944, uaddr=140736119902208, len=4096, 
perm=IOMMU_RW) at ../hw/virtio/vhost-backend.c:361
#3  0x55b6e34c in vhost_device_iotlb_miss (dev=0x584dd600, 
iova=10442706944, write=1) at ../hw/virtio/vhost.c:1113
#4  0x55b718d9 in vhost_backend_handle_iotlb_msg (dev=0x584dd600, 
imsg=0x7fffa7b0) at ../hw/virtio/vhost-backend.c:393
#5  0x55b76144 in slave_read (ioc=0x57a38680, condition=G_IO_IN, 
opaque=0x584dd600) at ../hw/virtio/vhost-user.c:1726
#6  0x55c797a5 in qio_channel_fd_source_dispatch 
(source=0x56a06fb0, callback=0x55b75f86 , 
user_data=0x584dd600) at ../io/channel-watch.c:84
#7  0x7554895d in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#8  0x75548d18 in g_main_context_iterate.isra () at 
/lib64/libglib-2.0.so.0
#9  0x75549042 in g_main_loop_run () at /lib64/libglib-2.0.so.0
#10 0x55b72de7 in vhost_user_read (dev=0x584dd600, 
msg=0x7fffac50) at ../hw/virtio/vhost-user.c:413
#11 0x55b72e9b in process_message_reply (dev=0x584dd600, 
msg=0x7fffaf10) at ../hw/virtio/vhost-user.c:439
#12 0x55b77c26 in vhost_user_send_device_iotlb_msg (dev=0x584dd600, 
imsg=0x7fffb1e0) at ../hw/virtio/vhost-user.c:2341
#13 0x55b7179e in vhost_backend_update_device_iotlb 
(dev=0x584dd600, iova=10468392960, uaddr=140736145588224, len=4096, 
perm=IOMMU_RW) at ../hw/virtio/vhost-backend.c:361
#14 0x55b6e34c in vhost_device_iotlb_miss (dev=0x584dd600, 
iova=10468392960, write=1) at ../hw/virtio/vhost.c:1113
#15 0x55b718d9 in vhost_backend_handle_iotlb_msg (dev=0x584dd600, 
imsg=0x7fffb390) at ../hw/virtio/vhost-backend.c:393
#16 0x55b76144 in slave_read (ioc=0x57a38680, condition=G_IO_IN, 
opaque=0x584dd600) at ../hw/virtio/vhost-user.c:1726
#17 0x55c797a5 in qio_channel_fd_source_dispatch 
(source=0x56c70250, callback=0x55b75f86 , 
user_data=0x584dd600) at ../io/channel-watch.c:84
#18 0x7554895d in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#19 0x75548d18 in g_main_context_iterate.isra () at 
/lib64/libglib-2.0.so.0
#20 0x75549042 in g_main_loop_run () at /lib64/libglib-2.0.so.0
#21 0x55b72de7 in vhost_user_read (dev=0x584dd600, 
msg=0x7fffb830) at ../hw/virtio/vhost-user.c:413
#22 0x55b72e9b in process_message_reply (dev=0x584dd600, 
msg=0x7fffbaf0) at ../hw/virtio/vhost-user.c:439
#23 0x55b77c26 in vhost_user_send_device_iotlb_msg (dev=0x584dd600, 
imsg=0x7fffbdc0) at ../hw/virtio/vhost-user.c:2341
#24 0x55b7179e in vhost_backend_update_device_iotlb 
(dev=0x584dd600, iova=10442702848, uaddr=140736119898112, len=4096, 
perm=IOMMU_RW) at ../hw/virtio/vhost-backend.c:361
#25 0x55b6e34c in vhost_device_iotlb_miss (dev=0x584dd600, 
iova=10442702848, write=1) at ../hw/virtio/vhost.c:1113
#26 0x55b718d9 in vhost_backend_handle_iotlb_msg (dev=0x584dd600, 
imsg=0x7fffbf70) at ../hw/virtio/vhost-backend.c:393
#27 0x55b76144 in slave_read (ioc=0x57a38680, condition=G_IO_IN, 
opaque=0x584dd600) at ../hw/virtio/vhost-user.c:1726
#28 0x55c797a5 in qio_channel_fd_source_dispatch 
(source=0x56f1a530, callback=0x55b75f86 , 
user_data=0x584dd600) at ../io/channel-watch.c:84
#29 0x7554895d in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
#30 0x75548d18 in g_main_context_iterate.isra () at 
/lib64/libglib-2.0.so.0
#31 0x75549042 in g_main_loop_run () at /lib64/libglib-2.0.so.0
#32 0x55b72de7 in vhost_user_read (dev=0x584dd600, 
msg=0x7fffc420) at ../hw/virtio/vhost-user.c:413
#33 0x55b754b1 in vhost_user_get_u64 (dev=0x584dd600, request=40, 
u64=0x7fffc6e0) at ../hw/virtio/vhost-user.c:1349
#34 0x55b758ff in vhost_user_get_status (dev=0x584dd600, 
status=0x7fffc713 "W\020") at ../hw/virtio/vhost-user.c:1474
#35 0x55b75967 in vhost_user_add_status (dev=0x584dd600, status=7 
'\a') at ../hw/virtio/vhost-user.c:1488
#36 0x55b78bf6 in vhost_user_dev_start (dev=0x584dd600, 
started=true) at ../hw/virtio/vhost-user.c:2758
#37 0x55b709ad in

Re: [PATCH v2] Fix exec migration on Windows (w32+w64).

2023-01-15 Thread Marc-André Lureau

Hi

On Mon, Jan 16, 2023 at 5:35 AM John Berberian, Jr  wrote:
>
> * Use cmd instead of /bin/sh on Windows.
>
> * Try to auto-detect cmd.exe's path, but default to a hard-coded path.
>
> Note that this will require that gspawn-win[32|64]-helper.exe and
> gspawn-win[32|64]-helper-console.exe are included in the Windows binary
> distributions (cc: Stefan Weil).
>
> Signed-off-by: "John Berberian, Jr" 
> ---
> Whoops, forgot a header. Here's a revised patch.
>
>  migration/exec.c | 24 
>  1 file changed, 24 insertions(+)
>
> diff --git a/migration/exec.c b/migration/exec.c
> index 375d2e1b54..38604d73a6 100644
> --- a/migration/exec.c
> +++ b/migration/exec.c
> @@ -23,12 +23,31 @@
>  #include "migration.h"
>  #include "io/channel-command.h"
>  #include "trace.h"
> +#include "qemu/cutils.h"
>
> +#ifdef WIN32
> +const char *exec_get_cmd_path(void);
> +const char *exec_get_cmd_path(void)
> +{
> +g_autofree char *detected_path = g_new(char, MAX_PATH);
> +if (GetSystemDirectoryA(detected_path, MAX_PATH) == 0) {
> +warn_report("Could not detect cmd.exe path, using default.");
> +return "C:\\Windows\\System32\\cmd.exe";
> +}
> +pstrcat(detected_path, MAX_PATH, "\\cmd.exe");
> +return g_steal_pointer(_path);
> +}
> +#endif
>
>  void exec_start_outgoing_migration(MigrationState *s, const char *command, 
> Error **errp)
>  {
>  QIOChannel *ioc;
> +
> +#ifdef WIN32
> +const char *argv[] = { exec_get_cmd_path(), "/c", command, NULL };
> +#else
>  const char *argv[] = { "/bin/sh", "-c", command, NULL };
> +#endif

It may be a better idea to use g_shell_parse_argv() instead.

>
>  trace_migration_exec_outgoing(command);
>  ioc = QIO_CHANNEL(qio_channel_command_new_spawn(argv,
> @@ -55,7 +74,12 @@ static gboolean exec_accept_incoming_migration(QIOChannel 
> *ioc,
>  void exec_start_incoming_migration(const char *command, Error **errp)
>  {
>  QIOChannel *ioc;
> +
> +#ifdef WIN32
> +const char *argv[] = { exec_get_cmd_path(), "/c", command, NULL };
> +#else
>  const char *argv[] = { "/bin/sh", "-c", command, NULL };
> +#endif
>
>  trace_migration_exec_incoming(command);
>  ioc = QIO_CHANNEL(qio_channel_command_new_spawn(argv,
> --
> 2.39.0
>
>


-- 
Marc-André Lureau

Re: [PATCH 0/3] Fix UNMAP notifier for intel-iommu

2023-01-15 Thread Jason Wang

On Mon, Jan 16, 2023 at 7:30 AM Viktor Prutyanov  wrote:
>
> On Tue, Nov 29, 2022 at 11:10 AM Jason Wang  wrote:
> >
> > Hi All:
> >
> > According to ATS, device should work if ATS is disabled. This is not
> > correctly implemented in the current intel-iommu since it doesn't
> > handle the UNMAP notifier correctly. This breaks the vhost-net +
> > vIOMMU without dt.
> >
> > The root casue is that the when there's a device IOTLB miss (note that
> > it's not specific to PCI so it can work without ATS), Qemu doesn't
> > build the IOVA tree, so when guest start an IOTLB invalidation, Qemu
> > won't trigger the UNMAP notifier.
> >
> > Fixing by build IOVA tree during IOMMU translsation.
> >
> > Thanks
> >
> > Jason Wang (3):
> >   intel-iommu: fail MAP notifier without caching mode
> >   intel-iommu: fail DEVIOTLB_UNMAP without dt mode
> >   intel-iommu: build iova tree during IOMMU translation
> >
> >  hw/i386/intel_iommu.c | 58 ---
> >  1 file changed, 33 insertions(+), 25 deletions(-)
> >
> > --
> > 2.25.1
> >
>
> Hi Jason,
>
> I've tried the series with Windows Server 2022 guest with vhost and
> intel-iommu (device-iotlb=off) and now networking on this system has
> become working.
> So, as we discussed, I'm waiting for the series to be accepted in some
> form to continue my work about supporting guests who refuse Device-TLB
> on systems with device-iotlb=on.
>
> Tested-by: Viktor Prutyanov 

Great, Peter has some comments on this series, so I will probably send
a new version (probably after the chinese new year).

Thanks

>
> Best regards,
> Viktor Prutyanov
>

Re: [RFC v2 12/13] vdpa: preemptive kick at enable

2023-01-15 Thread Jason Wang




在 2023/1/13 17:06, Eugenio Perez Martin 写道:

On Fri, Jan 13, 2023 at 4:39 AM Jason Wang  wrote:

On Fri, Jan 13, 2023 at 11:25 AM Zhu, Lingshan  wrote:



On 1/13/2023 10:31 AM, Jason Wang wrote:

On Fri, Jan 13, 2023 at 1:27 AM Eugenio Pérez  wrote:

Spuriously kick the destination device's queue so it knows in case there
are new descriptors.

RFC: This is somehow a gray area. The guest may have placed descriptors
in a virtqueue but not kicked it, so it might be surprised if the device
starts processing it.

So I think this is kind of the work of the vDPA parent. For the parent
that needs this trick, we should do it in the parent driver.

Agree, it looks easier implementing this in parent driver,
I can implement it in ifcvf set_vq_ready right now

Great, but please check whether or not it is really needed.

Some device implementation could check the available descriptions
after DRIVER_OK without waiting for a kick.


So IIUC we can entirely drop this from the series (and I hope we can).
But then, what with the devices that does *not* check for them?



It needs mediation in the vDPA parent driver.




If we drop it it seems to me we must mandate devices to check for
descriptors at queue_enable. The queue could stall if not, isn't it?



I'm not sure, did you see real issue with this? (Note that we don't do 
this for vhost-user-(vDPA))


Btw, the code can result of kick before DRIVER_OK, which seems racy.

Thanks




Thanks!


Thanks


Thanks
Zhu Lingshan

Thanks


However, that information is not in the migration stream and it should
be an edge case anyhow, being resilient to parallel notifications from
the guest.

Signed-off-by: Eugenio Pérez 
---
   hw/virtio/vhost-vdpa.c | 5 +
   1 file changed, 5 insertions(+)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 40b7e8706a..dff94355dd 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -732,11 +732,16 @@ static int vhost_vdpa_set_vring_ready(struct vhost_dev 
*dev, int ready)
   }
   trace_vhost_vdpa_set_vring_ready(dev);
   for (i = 0; i < dev->nvqs; ++i) {
+VirtQueue *vq;
   struct vhost_vring_state state = {
   .index = dev->vq_index + i,
   .num = 1,
   };
   vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, );
+
+/* Preemptive kick */
+vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
+event_notifier_set(virtio_queue_get_host_notifier(vq));
   }
   return 0;
   }
--
2.31.1

Re: [RFC v2 11/13] vdpa: add vdpa net migration state notifier

2023-01-15 Thread Jason Wang




在 2023/1/13 17:00, Eugenio Perez Martin 写道:

On Fri, Jan 13, 2023 at 5:55 AM Jason Wang  wrote:

On Fri, Jan 13, 2023 at 1:25 AM Eugenio Pérez  wrote:

This allows net to restart the device backend to configure SVQ on it.

Ideally, these changes should not be net specific. However, the vdpa net
backend is the one with enough knowledge to configure everything because
of some reasons:
* Queues might need to be shadowed or not depending on its kind (control
   vs data).
* Queues need to share the same map translations (iova tree).

Because of that it is cleaner to restart the whole net backend and
configure again as expected, similar to how vhost-kernel moves between
userspace and passthrough.

If more kinds of devices need dynamic switching to SVQ we can create a
callback struct like VhostOps and move most of the code there.
VhostOps cannot be reused since all vdpa backend share them, and to
personalize just for networking would be too heavy.

Signed-off-by: Eugenio Pérez 
---
  net/vhost-vdpa.c | 84 
  1 file changed, 84 insertions(+)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 5d7ad6e4d7..f38532b1df 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -26,6 +26,8 @@
  #include 
  #include "standard-headers/linux/virtio_net.h"
  #include "monitor/monitor.h"
+#include "migration/migration.h"
+#include "migration/misc.h"
  #include "migration/blocker.h"
  #include "hw/virtio/vhost.h"

@@ -33,6 +35,7 @@
  typedef struct VhostVDPAState {
  NetClientState nc;
  struct vhost_vdpa vhost_vdpa;
+Notifier migration_state;
  Error *migration_blocker;
  VHostNetState *vhost_net;

@@ -243,10 +246,86 @@ static VhostVDPAState 
*vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
  return DO_UPCAST(VhostVDPAState, nc, nc0);
  }

+static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
+{
+struct vhost_vdpa *v = >vhost_vdpa;
+VirtIONet *n;
+VirtIODevice *vdev;
+int data_queue_pairs, cvq, r;
+NetClientState *peer;
+
+/* We are only called on the first data vqs and only if x-svq is not set */
+if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
+return;
+}
+
+vdev = v->dev->vdev;
+n = VIRTIO_NET(vdev);
+if (!n->vhost_started) {
+return;
+}
+
+if (enable) {
+ioctl(v->device_fd, VHOST_VDPA_SUSPEND);

Do we need to check if the device is started or not here?


v->vhost_started is checked right above, right?



Right, I miss that.





+}

I'm not sure I understand the reason for vhost_net_stop() after a
VHOST_VDPA_SUSPEND. It looks to me those functions are duplicated.


I think this is really worth exploring, and it would have been clearer
if I didn't squash the vhost_reset_status commit by mistake :).

Looking at qemu master vhost.c:vhost_dev_stop:
 if (hdev->vhost_ops->vhost_dev_start) {
 hdev->vhost_ops->vhost_dev_start(hdev, false);
 }
 if (vrings) {
 vhost_dev_set_vring_enable(hdev, false);
 }
 for (i = 0; i < hdev->nvqs; ++i) {
 vhost_virtqueue_stop(hdev,
  vdev,
  hdev->vqs + i,
  hdev->vq_index + i);
 }

Both vhost-used and vhost-vdpa set_status(0) at
->vhost_dev_start(hdev, false). It cleans virtqueue state in vdpa so
they are not recoverable at vhost_virtqueue_stop->get_vring_base, and
I think it is too late for vdpa devices to change it. I guess
vhost-user devices do not lose the state there, but I did not test.

I call VHOST_VDPA_SUSPEND here so vhost_vdpa_dev_start looks more
similar to vhost_user_dev_start. We can make
vhost_vdpa_dev_start(false) to suspend the device instead. But then we
need to reset it after getting the indexes. That's why I added
vhost_vdpa_reset_status, but I admit it is neither the cleanest
approach nor the best name to it.



I wonder if we can simply suspend in vhost_net_stop() if we know the 
parent can stop?


Thanks




Adding Maxime, RFC here so we can make -vdpa and -user not to divert too much.


+data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
+cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
+  n->max_ncs - n->max_queue_pairs : 0;
+vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
+
+peer = s->nc.peer;
+for (int i = 0; i < data_queue_pairs + cvq; i++) {
+VhostVDPAState *vdpa_state;
+NetClientState *nc;
+
+if (i < data_queue_pairs) {
+nc = qemu_get_peer(peer, i);
+} else {
+nc = qemu_get_peer(peer, n->max_queue_pairs);
+}
+
+vdpa_state = DO_UPCAST(VhostVDPAState, nc, nc);
+vdpa_state->vhost_vdpa.shadow_data = enable;
+
+if (i < data_queue_pairs) {
+/* Do not override CVQ shadow_vqs_enabled */
+vdpa_state->vhost_vdpa.shadow_vqs_enabled = enable;
+}
+}
+
+r =

Re: [RFC v2 08/13] vdpa: Negotiate _F_SUSPEND feature

2023-01-15 Thread Jason Wang




在 2023/1/13 16:45, Eugenio Perez Martin 写道:

On Fri, Jan 13, 2023 at 5:39 AM Jason Wang  wrote:

On Fri, Jan 13, 2023 at 1:25 AM Eugenio Pérez  wrote:

This is needed for qemu to know it can suspend the device to retrieve
its status and enable SVQ with it, so all the process is transparent to
the guest.

Signed-off-by: Eugenio Pérez 

Acked-by: Jason Wang 

We probably need to add the resume in the future to have a quick
recovery from migration failures.


The capability of a resume can be useful here but only in a small
window. During the most time of the migration SVQ is enabled, so in
the event of a migration failure we may need to reset the whole device
to enable passthrough again.



Yes.




But maybe is it worth giving a quick review and adding some TODOs
where it can be useful in this series?



We can start by having a TODO in this series, and leave resume in for 
the future.


Thanks




Thanks!


Thanks


---
  hw/virtio/vhost-vdpa.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 4296427a69..a61a6b2a74 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -659,7 +659,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
  uint64_t features;
  uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
  0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
-0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
+0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
+0x1ULL << VHOST_BACKEND_F_SUSPEND;
  int r;

  if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, )) {
--
2.31.1

Re: [PATCH 1/7] hw/acpi: Add missing includes

2023-01-15 Thread Markus Armbruster

Bernhard Beschow  writes:

> When removing the "hw/boards.h" include from
> hw/acpi/acpi_dev_interface.h, these include directives must be added to make
> the code compile again.
>
> Signed-off-by: Bernhard Beschow 

You don't actually remove #include "hw/boards.h" from
hw/acpi/acpi_dev_interface.h in this series.  Accident?

Re: [PATCH v2 01/21] hw/block: Rename TYPE_PFLASH_CFI02 'width' property as 'device-width'

2023-01-15 Thread Markus Armbruster

Peter Maydell  writes:

> On Mon, 9 Jan 2023 at 14:19, Philippe Mathieu-Daudé  wrote:
>>
>> On 9/1/23 14:33, BALATON Zoltan wrote:
>> > On Mon, 9 Jan 2023, Philippe Mathieu-Daudé wrote:
>> >> Use the same property name than the TYPE_PFLASH_CFI01 model.
>> >
>> > Nothing uses it? Can this break command lines and if so do we need
>> > deprecation or some compatibility function until everybody changed their
>> > usage?
>>
>> Good point... I missed that :/
>
> That should not be possible, because the cfi02 device
> is a sysbus device that must be mapped into memory. There's
> no useful way to use it on the QEMU commandline; the only
> users are those creating it from C code within QEMU.

I'd say beware of -global, but "fortunately" cfi.pflash01 cannot work
with it, since its '.' sabotages the -global's syntax.

Related prior discussion in the cover letter of "[PATCH RFC 0/1] QOM
type names and QAPI" and the replies to it:

Message-Id: <20210129081519.3848145-1-arm...@redhat.com>
https://lists.gnu.org/archive/html/qemu-devel/2021-01/msg07541.html

The patch there became commit e178113ff6 "hw: Replace anti-social QOM
type names".

[...]

Re: [RFC v2 06/13] vhost: delay set_vring_ready after DRIVER_OK

2023-01-15 Thread Jason Wang




在 2023/1/13 16:19, Eugenio Perez Martin 写道:

On Fri, Jan 13, 2023 at 5:36 AM Jason Wang  wrote:

On Fri, Jan 13, 2023 at 1:25 AM Eugenio Pérez  wrote:

To restore the device at the destination of a live migration we send the
commands through control virtqueue. For a device to read CVQ it must
have received the DRIVER_OK status bit.

This probably requires the support from the parent driver and requires
some changes or fixes in the parent driver.

Some drivers did:

parent_set_status():
if (DRIVER_OK)
 if (queue_enable)
 write queue_enable to the device

Examples are IFCVF or even vp_vdpa at least. MLX5 seems to be fine.


I don't get your point here. No device should start reading CVQ (or
any other VQ) without having received DRIVER_OK.



If I understand the code correctly:

For CVQ, we do SET_VRING_ENABLE before DRIVER_OK, that's fine.

For datapath_vq, we do SET_VRING_ENABLE after DRIVER_OK, this requires 
parent driver support (explained above)





Some parent drivers do not support sending the queue enable command
after DRIVER_OK, usually because they clean part of the state like the
set by set_vring_base. Even vdpa_net_sim needs fixes here.



Yes, so the question is:

Do we need another backend feature for this? (otherwise thing may break 
silently)





But my understanding is that it should be supported so I consider it a
bug.



Probably, we need fine some proof in the spec, e.g in 3.1.1:

"""

7.Perform device-specific setup, including discovery of virtqueues for 
the device, optional per-bus setup, reading and possibly writing the 
device’s virtio configuration space, and population of virtqueues.

8.Set the DRIVER_OK status bit. At this point the device is “live”.

"""

So if my understanding is correct, "discovery of virtqueues for the 
device" implies queue_enable here which is expected to be done before 
DRIVER_OK. But it doesn't say anything regrading to the behaviour of 
setting queue ready after DRIVER_OK.


I'm not sure it's a real bug or not, may Michael and comment on this.



  Especially after queue_reset patches. Is that what you mean?



We haven't supported queue_reset yet in Qemu. But it allows to write 1 
to queue_enable after DRIVER_OK for sure.






However this opens a window where the device could start receiving
packets in rx queue 0 before it receives the RSS configuration. To avoid
that, we will not send vring_enable until all configuration is used by
the device.

As a first step, run vhost_set_vring_ready for all vhost_net backend after
all of them are started (with DRIVER_OK). This code should not affect
vdpa.

Signed-off-by: Eugenio Pérez 
---
  hw/net/vhost_net.c | 17 -
  1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index c4eecc6f36..3900599465 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -399,6 +399,18 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
  } else {
  peer = qemu_get_peer(ncs, n->max_queue_pairs);
  }
+r = vhost_net_start_one(get_vhost_net(peer), dev);
+if (r < 0) {
+goto err_start;
+}
+}
+
+for (int j = 0; j < nvhosts; j++) {
+if (j < data_queue_pairs) {
+peer = qemu_get_peer(ncs, j);
+} else {
+peer = qemu_get_peer(ncs, n->max_queue_pairs);
+}

I fail to understand why we need to change the vhost_net layer? This
is vhost-vDPA specific, so I wonder if we can limit the changes to e.g
vhost_vdpa_dev_start()?


The vhost-net layer explicitly calls vhost_set_vring_enable before
vhost_dev_start, and this is exactly the behavior we want to avoid.
Even if we make changes to vhost_dev, this change is still needed.



Note that the only user of vhost_set_vring_enable() is vhost-user where 
the semantic is different:


It uses that to changes the number of active queues:

static int peer_attach(VirtIONet *n, int index)

        if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
=>  vhost_set_vring_enable(nc->peer, 1);
    }

This is not the semantic of vhost-vDPA that tries to be complaint with 
virtio-spec. So I'm not sure how it can help here.





And we want to explicitly enable CVQ first, which "only" vhost_net
knows which is.



This should be known by net/vhost-vdpa.c.



To perform that in vhost_vdpa_dev_start would require
quirks, involving one or more of:
* Ignore vq enable calls if the device is not the CVQ one. How to
signal what is the CVQ? Can we trust it will be the last one for all
kind of devices?
* Enable queues that do not belong to the last vhost_dev from the enable call.
* Enable the rest of the queues from the last enable in reverse order.
* Intercalate the "net load" callback between enabling the last
vhost_vdpa device and enabling the rest of devices.
* Add an "enable priority" order?



Haven't had time in thinking through, but it would be better if we can 
limit the changes in vhost-vdpa

Re: [PATCH 2/2] target/riscv: Trap on writes to stimecmp from VS when hvictl.VTI=1

2023-01-15 Thread Alistair Francis

On Fri, Dec 16, 2022 at 8:46 AM Andrew Bresticker  wrote:
>
> Per the AIA specification, writes to stimecmp from VS level should
> trap when hvictl.VTI is set since the write may cause vsip.STIP to
> become unset.
>
> Fixes: 3ec0fe18a31f ("target/riscv: Add vstimecmp support")
> Signed-off-by: Andrew Bresticker 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
>  target/riscv/csr.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 984548bf87..7d9035e7bb 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -935,6 +935,9 @@ static RISCVException write_stimecmp(CPURISCVState *env, 
> int csrno,
>  RISCVCPU *cpu = env_archcpu(env);
>
>  if (riscv_cpu_virt_enabled(env)) {
> +if (env->hvictl & HVICTL_VTI) {
> +return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
> +}
>  return write_vstimecmp(env, csrno, val);
>  }
>
> @@ -955,6 +958,9 @@ static RISCVException write_stimecmph(CPURISCVState *env, 
> int csrno,
>  RISCVCPU *cpu = env_archcpu(env);
>
>  if (riscv_cpu_virt_enabled(env)) {
> +if (env->hvictl & HVICTL_VTI) {
> +return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
> +}
>  return write_vstimecmph(env, csrno, val);
>  }
>
> --
> 2.25.1
>
>

Re: [PATCH v2 2/5] target/riscv: Update VS timer whenever htimedelta changes

2023-01-15 Thread Anup Patel

Hi Alistair,

On Tue, Jan 3, 2023 at 9:43 PM Anup Patel  wrote:
>
> Hi Alistair,
>
> On Wed, Dec 28, 2022 at 11:08 AM Alistair Francis  
> wrote:
> >
> > On Fri, Dec 23, 2022 at 11:14 PM Anup Patel  wrote:
> > >
> > > On Thu, Dec 15, 2022 at 8:55 AM Alistair Francis  
> > > wrote:
> > > >
> > > > On Mon, Dec 12, 2022 at 9:12 PM Anup Patel  
> > > > wrote:
> > > > >
> > > > > On Mon, Dec 12, 2022 at 11:23 AM Alistair Francis 
> > > > >  wrote:
> > > > > >
> > > > > > On Thu, Dec 8, 2022 at 6:41 PM Anup Patel  
> > > > > > wrote:
> > > > > > >
> > > > > > > On Thu, Dec 8, 2022 at 9:00 AM Alistair Francis 
> > > > > > >  wrote:
> > > > > > > >
> > > > > > > > On Tue, Nov 8, 2022 at 11:07 PM Anup Patel 
> > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > The htimedelta[h] CSR has impact on the VS timer comparison 
> > > > > > > > > so we
> > > > > > > > > should call riscv_timer_write_timecmp() whenever htimedelta 
> > > > > > > > > changes.
> > > > > > > > >
> > > > > > > > > Fixes: 3ec0fe18a31f ("target/riscv: Add vstimecmp suppor")
> > > > > > > > > Signed-off-by: Anup Patel 
> > > > > > > > > Reviewed-by: Alistair Francis 
> > > > > > > >
> > > > > > > > This patch breaks my Xvisor test. When running OpenSBI and 
> > > > > > > > Xvisor like this:
> > > > > > > >
> > > > > > > > qemu-system-riscv64 -machine virt \
> > > > > > > > -m 1G -serial mon:stdio -serial null -nographic \
> > > > > > > > -append 'vmm.console=uart@1000 vmm.bootcmd="vfs mount 
> > > > > > > > initrd
> > > > > > > > /;vfs run /boot.xscript;vfs cat /system/banner.txt; guest kick 
> > > > > > > > guest0;
> > > > > > > > vserial bind guest0/uart0"' \
> > > > > > > > -smp 4 -d guest_errors \
> > > > > > > > -bios none \
> > > > > > > > -device 
> > > > > > > > loader,file=./images/qemuriscv64/vmm.bin,addr=0x8020 \
> > > > > > > > -kernel ./images/qemuriscv64/fw_jump.elf \
> > > > > > > > -initrd ./images/qemuriscv64/vmm-disk-linux.img -cpu 
> > > > > > > > rv64,h=true
> > > > > > > >
> > > > > > > > Running:
> > > > > > > >
> > > > > > > > Xvisor v0.3.0-129-gbc33f339 (Jan  1 1970 00:00:00)
> > > > > > > >
> > > > > > > > I see this failure:
> > > > > > > >
> > > > > > > > INIT: bootcmd:  guest kick guest0
> > > > > > > >
> > > > > > > > guest0: Kicked
> > > > > > > >
> > > > > > > > INIT: bootcmd:  vserial bind guest0/uart0
> > > > > > > >
> > > > > > > > [guest0/uart0] cpu_vcpu_stage2_map: 
> > > > > > > > guest_phys=0x3B9AC000
> > > > > > > > size=0x4096 map failed
> > > > > > > >
> > > > > > > > do_error: CPU3: VCPU=guest0/vcpu0 page fault failed (error -1)
> > > > > > > >
> > > > > > > >zero=0x  ra=0x80001B4E
> > > > > > > >
> > > > > > > >  sp=0x8001CF80  gp=0x
> > > > > > > >
> > > > > > > >  tp=0x  s0=0x8001CFB0
> > > > > > > >
> > > > > > > >  s1=0x  a0=0x10001048
> > > > > > > >
> > > > > > > >  a1=0x  a2=0x00989680
> > > > > > > >
> > > > > > > >  a3=0x3B9ACA00  a4=0x0048
> > > > > > > >
> > > > > > > >  a5=0x  a6=0x00019000
> > > > > > > >
> > > > > > > >  a7=0x  s2=0x
> > > > > > > >
> > > > > > > >  s3=0x  s4=0x
> > > > > > > >
> > > > > > > >  s5=0x  s6=0x
> > > > > > > >
> > > > > > > >  s7=0x  s8=0x
> > > > > > > >
> > > > > > > >  s9=0x s10=0x
> > > > > > > >
> > > > > > > > s11=0x  t0=0x4000
> > > > > > > >
> > > > > > > >  t1=0x0100  t2=0x
> > > > > > > >
> > > > > > > >  t3=0x  t4=0x
> > > > > > > >
> > > > > > > >  t5=0x  t6=0x
> > > > > > > >
> > > > > > > >sepc=0x80001918 sstatus=0x00024120
> > > > > > > >
> > > > > > > > hstatus=0x0002002001C0 sp_exec=0x10A64000
> > > > > > > >
> > > > > > > >  scause=0x0017   stval=0x3B9ACAF8
> > > > > > > >
> > > > > > > >   htval=0x0EE6B2BE  htinst=0x00D03021
> > > > > > > >
> > > > > > > > I have tried updating to a newer Xvisor release, but with that 
> > > > > > > > I don't
> > > > > > > > get any serial output.
> > > > > > > >
> > > > > > > > Can you help get the Xvisor tests back up and running?
> > > > > > >
> > > > > > > I tried the latest Xvisor-next 
> > > > > > > (https://github.com/avpatel/xvisor-next)
> > > > > > > with your QEMU riscv-to-apply.next branch and it works fine (both
> > > > > > > with and without Sstc).
> > > > > >
> >

Re: [RFC v2 05/13] vdpa net: add migration blocker if cannot migrate cvq

2023-01-15 Thread Michael S. Tsirkin

On Mon, Jan 16, 2023 at 11:34:20AM +0800, Jason Wang wrote:
> 
> 在 2023/1/13 15:46, Eugenio Perez Martin 写道:
> > On Fri, Jan 13, 2023 at 5:25 AM Jason Wang  wrote:
> > > 
> > > 在 2023/1/13 01:24, Eugenio Pérez 写道:
> > > > A vdpa net device must initialize with SVQ in order to be migratable,
> > > > and initialization code verifies conditions.  If the device is not
> > > > initialized with the x-svq parameter, it will not expose _F_LOG so vhost
> > > > sybsystem will block VM migration from its initialization.
> > > > 
> > > > Next patches change this. Net data VQs will be shadowed only at
> > > > migration time and vdpa net devices need to expose _F_LOG as long as it
> > > > can go to SVQ.
> > > > 
> > > > Since we don't know that at initialization time but at start, add an
> > > > independent blocker at CVQ.
> > > > 
> > > > Signed-off-by: Eugenio Pérez 
> > > > ---
> > > >net/vhost-vdpa.c | 35 +--
> > > >1 file changed, 29 insertions(+), 6 deletions(-)
> > > > 
> > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > index 631424d9c4..2ca93e850a 100644
> > > > --- a/net/vhost-vdpa.c
> > > > +++ b/net/vhost-vdpa.c
> > > > @@ -26,12 +26,14 @@
> > > >#include 
> > > >#include "standard-headers/linux/virtio_net.h"
> > > >#include "monitor/monitor.h"
> > > > +#include "migration/blocker.h"
> > > >#include "hw/virtio/vhost.h"
> > > > 
> > > >/* Todo:need to add the multiqueue support here */
> > > >typedef struct VhostVDPAState {
> > > >NetClientState nc;
> > > >struct vhost_vdpa vhost_vdpa;
> > > > +Error *migration_blocker;
> > > 
> > > Any reason we can't use the mivration_blocker in vhost_dev structure?
> > > 
> > > I believe we don't need to wait until start to know we can't migrate.
> > > 
> > Device migratability also depends on features that the guest acks.
> 
> 
> This sounds a little bit tricky, more below:
> 
> 
> > 
> > For example, if the device does not support ASID it can be migrated as
> > long as _F_CVQ is not acked.
> 
> 
> The management may notice a non-consistent behavior in this case. I wonder
> if we can simply check the host features.
> 
> Thanks


Yes the issue is that ack can happen after migration started.
I don't think this kind of blocker appearing during migration
is currently expected/supported well. Is it?

> 
> > 
> > Thanks!
> > 
> > > Thanks
> > > 
> > > 
> > > >VHostNetState *vhost_net;
> > > > 
> > > >/* Control commands shadow buffers */
> > > > @@ -433,9 +435,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState 
> > > > *nc)
> > > >g_strerror(errno), errno);
> > > >return -1;
> > > >}
> > > > -if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
> > > > -!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
> > > > -return 0;
> > > > +if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
> > > > +error_setg(>migration_blocker,
> > > > +   "vdpa device %s does not support ASID",
> > > > +   nc->name);
> > > > +goto out;
> > > > +}
> > > > +if (!vhost_vdpa_net_valid_svq_features(v->dev->features,
> > > > +   >migration_blocker)) {
> > > > +goto out;
> > > >}
> > > > 
> > > >/*
> > > > @@ -455,7 +463,10 @@ static int vhost_vdpa_net_cvq_start(NetClientState 
> > > > *nc)
> > > >}
> > > > 
> > > >if (group == cvq_group) {
> > > > -return 0;
> > > > +error_setg(>migration_blocker,
> > > > +"vdpa %s vq %d group %"PRId64" is the same as cvq 
> > > > group "
> > > > +"%"PRId64, nc->name, i, group, cvq_group);
> > > > +goto out;
> > > >}
> > > >}
> > > > 
> > > > @@ -468,8 +479,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState 
> > > > *nc)
> > > >s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
> > > > 
> > > >out:
> > > > -if (!s->vhost_vdpa.shadow_vqs_enabled) {
> > > > -return 0;
> > > > +if (s->migration_blocker) {
> > > > +Error *errp = NULL;
> > > > +r = migrate_add_blocker(s->migration_blocker, );
> > > > +if (unlikely(r != 0)) {
> > > > +g_clear_pointer(>migration_blocker, error_free);
> > > > +error_report_err(errp);
> > > > +}
> > > > +
> > > > +return r;
> > > >}
> > > > 
> > > >s0 = vhost_vdpa_net_first_nc_vdpa(s);
> > > > @@ -513,6 +531,11 @@ static void vhost_vdpa_net_cvq_stop(NetClientState 
> > > > *nc)
> > > >vhost_vdpa_cvq_unmap_buf(>vhost_vdpa, s->status);
> > > >}
> > > > 
> > > > +if (s->migration_blocker) {
> > > > +migrate_del_blocker(s->migration_blocker);
> > > > +g_clear_pointer(>migration_blocker, error_free);
> > > > +}
> > > > +
> > > >

Call qemu_socketpair() instead of socketpair() when possible

2023-01-15 Thread Guoyi Tu


As qemu_socketpair() was introduced in commit 3c63b4e9
("oslib-posix: Introduce qemu_socketpair()"), it's time
to replace the other existing socketpair() calls with
qemu_socketpair() if possible

Signed-off-by: Guoyi Tu 
---
 backends/tpm/tpm_emulator.c | 2 +-
 tests/qtest/dbus-display-test.c | 5 +++--
 tests/qtest/migration-test.c| 2 +-
 tests/unit/test-crypto-tlssession.c | 4 ++--
 tests/unit/test-io-channel-tls.c| 2 +-
 5 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c
index 49cc3d749d..67e7b212e3 100644
--- a/backends/tpm/tpm_emulator.c
+++ b/backends/tpm/tpm_emulator.c
@@ -553,7 +553,7 @@ static int tpm_emulator_prepare_data_fd(TPMEmulator 
*tpm_emu)

 Error *err = NULL;
 int fds[2] = { -1, -1 };

-if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) {
+if (qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) {
 error_report("tpm-emulator: Failed to create socketpair");
 return -1;
 }
diff --git a/tests/qtest/dbus-display-test.c 
b/tests/qtest/dbus-display-test.c

index cb1b62d1d1..fef025ac6f 100644
--- a/tests/qtest/dbus-display-test.c
+++ b/tests/qtest/dbus-display-test.c
@@ -1,5 +1,6 @@
 #include "qemu/osdep.h"
 #include "qemu/dbus.h"
+#include "qemu/sockets.h"
 #include 
 #include 
 #include "libqtest.h"
@@ -36,7 +37,7 @@ test_setup(QTestState **qts, GDBusConnection **conn)

 *qts = qtest_init("-display dbus,p2p=yes -name dbus-test");

-g_assert_cmpint(socketpair(AF_UNIX, SOCK_STREAM, 0, pair), ==, 0);
+g_assert_cmpint(qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, pair), ==, 0);

 qtest_qmp_add_client(*qts, "@dbus-display", pair[1]);

@@ -152,7 +153,7 @@ test_dbus_display_console(void)

 test_setup(, );

-g_assert_cmpint(socketpair(AF_UNIX, SOCK_STREAM, 0, pair), ==, 0);
+g_assert_cmpint(qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, pair), ==, 0);
 fd_list = g_unix_fd_list_new();
 idx = g_unix_fd_list_append(fd_list, pair[1], NULL);

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index dbde726adf..1dd32c9506 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1661,7 +1661,7 @@ static void *test_migrate_fd_start_hook(QTestState 
*from,

 int pair[2];

 /* Create two connected sockets for migration */
-ret = socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);
+ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);
 g_assert_cmpint(ret, ==, 0);

 /* Send the 1st socket to the target */
diff --git a/tests/unit/test-crypto-tlssession.c 
b/tests/unit/test-crypto-tlssession.c

index 615a1344b4..b12e7b6879 100644
--- a/tests/unit/test-crypto-tlssession.c
+++ b/tests/unit/test-crypto-tlssession.c
@@ -82,7 +82,7 @@ static void test_crypto_tls_session_psk(void)
 int ret;

 /* We'll use this for our fake client-server connection */
-ret = socketpair(AF_UNIX, SOCK_STREAM, 0, channel);
+ret = qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, channel);
 g_assert(ret == 0);

 /*
@@ -236,7 +236,7 @@ static void test_crypto_tls_session_x509(const void 
*opaque)

 int ret;

 /* We'll use this for our fake client-server connection */
-ret = socketpair(AF_UNIX, SOCK_STREAM, 0, channel);
+ret = qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, channel);
 g_assert(ret == 0);

 /*
diff --git a/tests/unit/test-io-channel-tls.c 
b/tests/unit/test-io-channel-tls.c

index cc39247556..e036ac5df4 100644
--- a/tests/unit/test-io-channel-tls.c
+++ b/tests/unit/test-io-channel-tls.c
@@ -121,7 +121,7 @@ static void test_io_channel_tls(const void *opaque)
 GMainContext *mainloop;

 /* We'll use this for our fake client-server connection */
-g_assert(socketpair(AF_UNIX, SOCK_STREAM, 0, channel) == 0);
+g_assert(qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, channel) == 0);

 #define CLIENT_CERT_DIR "tests/test-io-channel-tls-client/"
 #define SERVER_CERT_DIR "tests/test-io-channel-tls-server/"
--
2.25.1

Re: [PATCH 2/2] target/riscv: Trap on writes to stimecmp from VS when hvictl.VTI=1

2023-01-15 Thread Alistair Francis

On Fri, Dec 16, 2022 at 8:46 AM Andrew Bresticker  wrote:
>
> Per the AIA specification, writes to stimecmp from VS level should
> trap when hvictl.VTI is set since the write may cause vsip.STIP to
> become unset.
>
> Fixes: 3ec0fe18a31f ("target/riscv: Add vstimecmp support")
> Signed-off-by: Andrew Bresticker 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/csr.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 984548bf87..7d9035e7bb 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -935,6 +935,9 @@ static RISCVException write_stimecmp(CPURISCVState *env, 
> int csrno,
>  RISCVCPU *cpu = env_archcpu(env);
>
>  if (riscv_cpu_virt_enabled(env)) {
> +if (env->hvictl & HVICTL_VTI) {
> +return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
> +}
>  return write_vstimecmp(env, csrno, val);
>  }
>
> @@ -955,6 +958,9 @@ static RISCVException write_stimecmph(CPURISCVState *env, 
> int csrno,
>  RISCVCPU *cpu = env_archcpu(env);
>
>  if (riscv_cpu_virt_enabled(env)) {
> +if (env->hvictl & HVICTL_VTI) {
> +return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
> +}
>  return write_vstimecmph(env, csrno, val);
>  }
>
> --
> 2.25.1
>
>

Re: [PATCH 1/2] target/riscv: Fix up masking of vsip/vsie accesses

2023-01-15 Thread Alistair Francis

On Fri, Dec 16, 2022 at 8:46 AM Andrew Bresticker  wrote:
>
> The current logic attempts to shift the VS-level bits into their correct
> position in mip while leaving the remaining bits in-tact. This is both
> pointless and likely incorrect since one would expect that any new, future
> VS-level interrupts will get their own position in mip rather than sharing
> with their (H)S-level equivalent. Fix this, and make the logic more
> readable, by just making off the VS-level bits and shifting them into
> position.
>
> This also fixes reads of vsip, which would only ever report vsip.VSSIP
> since the non-writable bits got masked off as well.
>
> Fixes: d028ac7512f1 ("arget/riscv: Implement AIA CSRs for 64 local interrupts 
> on RV32")
> Signed-off-by: Andrew Bresticker 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/csr.c | 35 +++
>  1 file changed, 11 insertions(+), 24 deletions(-)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 5c9a7ee287..984548bf87 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -1975,22 +1975,15 @@ static RISCVException rmw_vsie64(CPURISCVState *env, 
> int csrno,
>   uint64_t new_val, uint64_t wr_mask)
>  {
>  RISCVException ret;
> -uint64_t rval, vsbits, mask = env->hideleg & VS_MODE_INTERRUPTS;
> +uint64_t rval, mask = env->hideleg & VS_MODE_INTERRUPTS;
>
>  /* Bring VS-level bits to correct position */
> -vsbits = new_val & (VS_MODE_INTERRUPTS >> 1);
> -new_val &= ~(VS_MODE_INTERRUPTS >> 1);
> -new_val |= vsbits << 1;
> -vsbits = wr_mask & (VS_MODE_INTERRUPTS >> 1);
> -wr_mask &= ~(VS_MODE_INTERRUPTS >> 1);
> -wr_mask |= vsbits << 1;
> +new_val = (new_val & (VS_MODE_INTERRUPTS >> 1)) << 1;
> +wr_mask = (wr_mask & (VS_MODE_INTERRUPTS >> 1)) << 1;
>
>  ret = rmw_mie64(env, csrno, , new_val, wr_mask & mask);
>  if (ret_val) {
> -rval &= mask;
> -vsbits = rval & VS_MODE_INTERRUPTS;
> -rval &= ~VS_MODE_INTERRUPTS;
> -*ret_val = rval | (vsbits >> 1);
> +*ret_val = (rval & mask) >> 1;
>  }
>
>  return ret;
> @@ -2191,22 +2184,16 @@ static RISCVException rmw_vsip64(CPURISCVState *env, 
> int csrno,
>   uint64_t new_val, uint64_t wr_mask)
>  {
>  RISCVException ret;
> -uint64_t rval, vsbits, mask = env->hideleg & vsip_writable_mask;
> +uint64_t rval, mask = env->hideleg & VS_MODE_INTERRUPTS;
>
>  /* Bring VS-level bits to correct position */
> -vsbits = new_val & (VS_MODE_INTERRUPTS >> 1);
> -new_val &= ~(VS_MODE_INTERRUPTS >> 1);
> -new_val |= vsbits << 1;
> -vsbits = wr_mask & (VS_MODE_INTERRUPTS >> 1);
> -wr_mask &= ~(VS_MODE_INTERRUPTS >> 1);
> -wr_mask |= vsbits << 1;
> -
> -ret = rmw_mip64(env, csrno, , new_val, wr_mask & mask);
> +new_val = (new_val & (VS_MODE_INTERRUPTS >> 1)) << 1;
> +wr_mask = (wr_mask & (VS_MODE_INTERRUPTS >> 1)) << 1;
> +
> +ret = rmw_mip64(env, csrno, , new_val,
> +wr_mask & mask & vsip_writable_mask);
>  if (ret_val) {
> -rval &= mask;
> -vsbits = rval & VS_MODE_INTERRUPTS;
> -rval &= ~VS_MODE_INTERRUPTS;
> -*ret_val = rval | (vsbits >> 1);
> +*ret_val = (rval & mask) >> 1;
>  }
>
>  return ret;
> --
> 2.25.1
>
>

Re: [PATCH v7 3/3] hw/riscv: clear kernel_entry higher bits in load_elf_ram_sym()

2023-01-15 Thread Alistair Francis

On Sat, Jan 14, 2023 at 11:41 PM Bin Meng  wrote:
>
> On Sat, Jan 14, 2023 at 1:18 AM Daniel Henrique Barboza
>  wrote:
> >
> > Recent hw/risc/boot.c changes caused a regression in an use case with
> > the Xvisor hypervisor. Running a 32 bit QEMU guest with '-kernel'
> > stopped working. The reason seems to be that Xvisor is using 64 bit to
> > encode the 32 bit addresses from the guest, and load_elf_ram_sym() is
> > sign-extending the result with '1's [1].
>
> I would say it's not a regression of QEMU but something weird happened
> to Alistair's 32-bit Xvisor image.

I don't think it's a Xvisor issue.

>
> I just built a 32-bit Xvisor image from the latest Xvisor head
> following the instructions provided in its source tree. With the
> mainline QEMU only BIN file boots, but ELF does not. My 32-bit Xvisor
> image has an address of 0x1000. Apparently this address is not
> correct, and the issue I saw is different from Alistair's. Alistair,
> could you investigate why your 32-bit Xvisor ELF image has an address
> of 0x8000 set to kernel_load_base?

Looking in load_elf() in include/hw/elf_ops.h at this line:

if (lowaddr)
*lowaddr = (uint64_t)(elf_sword)low;

I can see that `low` is 0x8000 but lowaddr is set to
0x8000. So the address is being sign extended with 1s.

This patch seems to be the correct fix.

Alistair

>
> >
> > This can very well be an issue with Xvisor, but since it's not hard to
> > amend it in our side we're going for it. Use a translate_fn() callback
> > to be called by load_elf_ram_sym() and clear the higher bits of the
> > result if we're running a 32 bit CPU.
> >
> > [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html
> >
> > Suggested-by: Philippe Mathieu-Daudé 
> > Suggested-by: Bin Meng 
> > Signed-off-by: Daniel Henrique Barboza 

Thanks for the patch. This should be the first patch of the series
though, so that we never break guest loading.

> > ---
> >  hw/riscv/boot.c| 23 ++-
> >  hw/riscv/microchip_pfsoc.c |  4 ++--
> >  hw/riscv/opentitan.c   |  3 ++-
> >  hw/riscv/sifive_e.c|  3 ++-
> >  hw/riscv/sifive_u.c|  4 ++--
> >  hw/riscv/spike.c   |  2 +-
> >  hw/riscv/virt.c|  4 ++--
> >  include/hw/riscv/boot.h|  1 +
> >  8 files changed, 34 insertions(+), 10 deletions(-)
> >
> > diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
> > index e868fb6ade..7f8295bf5e 100644
> > --- a/hw/riscv/boot.c
> > +++ b/hw/riscv/boot.c
> > @@ -213,7 +213,27 @@ static void riscv_load_initrd(MachineState *machine, 
> > uint64_t kernel_entry)
> >  }
> >  }
> >
> > +static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
> > +{
> > +RISCVHartArrayState *harts = opaque;
> > +
> > +/*
> > + * For 32 bit CPUs, kernel_load_base is sign-extended (i.e.
> > + * it can be padded with '1's) if the hypervisor, for some
> > + * reason, is using 64 bit addresses with 32 bit guests.
> > + *
> > + * Clear the higher bits to avoid the padding if we're
> > + * running a 32 bit CPU.
> > + */
> > +if (riscv_is_32bit(harts)) {
> > +return addr & 0x0fff;
> > +}
> > +
> > +return addr;
> > +}
> > +
> >  target_ulong riscv_load_kernel(MachineState *machine,
> > +   RISCVHartArrayState *harts,
> > target_ulong kernel_start_addr,
> > bool load_initrd,
> > symbol_fn_t sym_cb)
> > @@ -231,7 +251,8 @@ target_ulong riscv_load_kernel(MachineState *machine,
> >   * the (expected) load address load address. This allows kernels to 
> > have
> >   * separate SBI and ELF entry points (used by FreeBSD, for example).
> >   */
> > -if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL,
> > +if (load_elf_ram_sym(kernel_filename, NULL,
> > + translate_kernel_address, NULL,
> >   NULL, _load_base, NULL, NULL, 0,
> >   EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
> >  kernel_entry = kernel_load_base;
> > diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
> > index c45023a2b1..b7e171b605 100644
> > --- a/hw/riscv/microchip_pfsoc.c
> > +++ b/hw/riscv/microchip_pfsoc.c
> > @@ -629,8 +629,8 @@ static void 
> > microchip_icicle_kit_machine_init(MachineState *machine)
> >  kernel_start_addr = riscv_calc_kernel_start_addr(>soc.u_cpus,
> >   
> > firmware_end_addr);
> >
> > -kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> > - true, NULL);
> > +kernel_entry = riscv_load_kernel(machine, >soc.u_cpus,
> > + kernel_start_addr, true, NULL);
> >
> >  /* Compute the fdt load address in dram */
> >  fdt_load_addr = 
> >

Re: [PATCH v5 1/2] riscv: Pass Object to register_cpu_props instead of DeviceState

2023-01-15 Thread Alistair Francis

On Fri, Jan 13, 2023 at 8:36 PM Alexandre Ghiti  wrote:
>
> One can extract the DeviceState pointer from the Object pointer, so pass
> the Object for future commits to access other fields of Object.
>
> No functional changes intended.
>
> Signed-off-by: Alexandre Ghiti 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.c | 15 ---
>  1 file changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index cc75ca7667..7181b34f86 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -200,7 +200,7 @@ static const char * const riscv_intr_names[] = {
>  "reserved"
>  };
>
> -static void register_cpu_props(DeviceState *dev);
> +static void register_cpu_props(Object *obj);
>
>  const char *riscv_cpu_get_trap_name(target_ulong cause, bool async)
>  {
> @@ -238,7 +238,7 @@ static void riscv_any_cpu_init(Object *obj)
>  set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU);
>  #endif
>  set_priv_version(env, PRIV_VERSION_1_12_0);
> -register_cpu_props(DEVICE(obj));
> +register_cpu_props(obj);
>  }
>
>  #if defined(TARGET_RISCV64)
> @@ -247,7 +247,7 @@ static void rv64_base_cpu_init(Object *obj)
>  CPURISCVState *env = _CPU(obj)->env;
>  /* We set this in the realise function */
>  set_misa(env, MXL_RV64, 0);
> -register_cpu_props(DEVICE(obj));
> +register_cpu_props(obj);
>  /* Set latest version of privileged specification */
>  set_priv_version(env, PRIV_VERSION_1_12_0);
>  }
> @@ -280,7 +280,7 @@ static void rv128_base_cpu_init(Object *obj)
>  CPURISCVState *env = _CPU(obj)->env;
>  /* We set this in the realise function */
>  set_misa(env, MXL_RV128, 0);
> -register_cpu_props(DEVICE(obj));
> +register_cpu_props(obj);
>  /* Set latest version of privileged specification */
>  set_priv_version(env, PRIV_VERSION_1_12_0);
>  }
> @@ -290,7 +290,7 @@ static void rv32_base_cpu_init(Object *obj)
>  CPURISCVState *env = _CPU(obj)->env;
>  /* We set this in the realise function */
>  set_misa(env, MXL_RV32, 0);
> -register_cpu_props(DEVICE(obj));
> +register_cpu_props(obj);
>  /* Set latest version of privileged specification */
>  set_priv_version(env, PRIV_VERSION_1_12_0);
>  }
> @@ -343,7 +343,7 @@ static void riscv_host_cpu_init(Object *obj)
>  #elif defined(TARGET_RISCV64)
>  set_misa(env, MXL_RV64, 0);
>  #endif
> -register_cpu_props(DEVICE(obj));
> +register_cpu_props(obj);
>  }
>  #endif
>
> @@ -1083,9 +1083,10 @@ static Property riscv_cpu_extensions[] = {
>  DEFINE_PROP_END_OF_LIST(),
>  };
>
> -static void register_cpu_props(DeviceState *dev)
> +static void register_cpu_props(Object *obj)
>  {
>  Property *prop;
> +DeviceState *dev = DEVICE(obj);
>
>  for (prop = riscv_cpu_extensions; prop && prop->name; prop++) {
>  qdev_property_add_static(dev, prop);
> --
> 2.37.2
>
>

Re: [PATCH 00/10] riscv: create_fdt() related cleanups

2023-01-15 Thread Alistair Francis

On Thu, Jan 12, 2023 at 3:12 AM Daniel Henrique Barboza
 wrote:
>
> Hi,
>
> This is a follow-up of:
>
> "[PATCH v5 00/11] riscv: OpenSBI boot test and cleanups"
>
> Patches were based on top of riscv-to-apply.next [1] + the series above.
>
> The recent FDT changes made in hw/riscv (all machines are now using the
> FDT via MachineState::fdt) allowed for most of the cleanups made here.
>
> Patches 9 and 10 were based on a suggestion made by Phil a few weeks ago.
> I decided to go for it.
>
> [1] https://github.com/alistair23/qemu/tree/riscv-to-apply.next
>
> Daniel Henrique Barboza (10):
>   hw/riscv/spike.c: simplify create_fdt()
>   hw/riscv/virt.c: simplify create_fdt()
>   hw/riscv/sifive_u.c: simplify create_fdt()
>   hw/riscv/virt.c: remove 'is_32_bit' param from
> create_fdt_socket_cpus()
>   hw/riscv: use MachineState::fdt in riscv_socket_fdt_write_id()
>   hw/riscv: use ms->fdt in riscv_socket_fdt_write_distance_matrix()
>   hw/riscv: simplify riscv_load_fdt()
>   hw/riscv/virt.c: calculate socket count once in create_fdt_imsic()
>   hw/riscv/virt.c: rename MachineState 'mc' pointers to 'ms'
>   hw/riscv/spike.c: rename MachineState 'mc' pointers to' ms'

Applied patches 1 to 6.

Alistair

>
>  hw/riscv/boot.c|   4 +-
>  hw/riscv/microchip_pfsoc.c |   4 +-
>  hw/riscv/numa.c|  14 +-
>  hw/riscv/sifive_u.c|  11 +-
>  hw/riscv/spike.c   |  25 +-
>  hw/riscv/virt.c| 484 +++--
>  include/hw/riscv/boot.h|   2 +-
>  include/hw/riscv/numa.h|  10 +-
>  8 files changed, 277 insertions(+), 277 deletions(-)
>
> --
> 2.39.0
>
>

Re: [PATCH 06/10] hw/riscv: use ms->fdt in riscv_socket_fdt_write_distance_matrix()

2023-01-15 Thread Alistair Francis

On Thu, Jan 12, 2023 at 3:26 AM Daniel Henrique Barboza
 wrote:
>
> There's no need to use a MachineState pointer and a fdt pointer now that
> all RISC-V machines are using the FDT from the MachineState.
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/numa.c | 8 
>  hw/riscv/spike.c| 2 +-
>  hw/riscv/virt.c | 2 +-
>  include/hw/riscv/numa.h | 4 ++--
>  4 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c
> index f4343f5cde..4720102561 100644
> --- a/hw/riscv/numa.c
> +++ b/hw/riscv/numa.c
> @@ -164,7 +164,7 @@ void riscv_socket_fdt_write_id(const MachineState *ms, 
> const char *node_name,
>  }
>  }
>
> -void riscv_socket_fdt_write_distance_matrix(const MachineState *ms, void 
> *fdt)
> +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms)
>  {
>  int i, j, idx;
>  uint32_t *dist_matrix, dist_matrix_size;
> @@ -184,10 +184,10 @@ void riscv_socket_fdt_write_distance_matrix(const 
> MachineState *ms, void *fdt)
>  }
>  }
>
> -qemu_fdt_add_subnode(fdt, "/distance-map");
> -qemu_fdt_setprop_string(fdt, "/distance-map", "compatible",
> +qemu_fdt_add_subnode(ms->fdt, "/distance-map");
> +qemu_fdt_setprop_string(ms->fdt, "/distance-map", "compatible",
>  "numa-distance-map-v1");
> -qemu_fdt_setprop(fdt, "/distance-map", "distance-matrix",
> +qemu_fdt_setprop(ms->fdt, "/distance-map", "distance-matrix",
>   dist_matrix, dist_matrix_size);
>  g_free(dist_matrix);
>  }
> diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
> index 05d34651cb..91bf194ec1 100644
> --- a/hw/riscv/spike.c
> +++ b/hw/riscv/spike.c
> @@ -174,7 +174,7 @@ static void create_fdt(SpikeState *s, const MemMapEntry 
> *memmap,
>  g_free(clust_name);
>  }
>
> -riscv_socket_fdt_write_distance_matrix(mc, fdt);
> +riscv_socket_fdt_write_distance_matrix(mc);
>
>  qemu_fdt_add_subnode(fdt, "/chosen");
>  qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", "/htif");
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 1d3bd25cb5..e374b58f89 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -805,7 +805,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>  }
>  }
>
> -riscv_socket_fdt_write_distance_matrix(mc, mc->fdt);
> +riscv_socket_fdt_write_distance_matrix(mc);
>  }
>
>  static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap,
> diff --git a/include/hw/riscv/numa.h b/include/hw/riscv/numa.h
> index 634df6673f..8f5280211d 100644
> --- a/include/hw/riscv/numa.h
> +++ b/include/hw/riscv/numa.h
> @@ -100,9 +100,9 @@ void riscv_socket_fdt_write_id(const MachineState *ms, 
> const char *node_name,
>   * @ms: pointer to machine state
>   * @socket_id: socket index
>   *
> - * Write NUMA distance matrix in FDT for given machine
> + * Write NUMA distance matrix in MachineState->fdt
>   */
> -void riscv_socket_fdt_write_distance_matrix(const MachineState *ms, void 
> *fdt);
> +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms);
>
>  CpuInstanceProperties
>  riscv_numa_cpu_index_to_props(MachineState *ms, unsigned cpu_index);
> --
> 2.39.0
>
>

Re: [PATCH 05/10] hw/riscv: use MachineState::fdt in riscv_socket_fdt_write_id()

2023-01-15 Thread Alistair Francis

On Thu, Jan 12, 2023 at 3:28 AM Daniel Henrique Barboza
 wrote:
>
> There's no need to use a MachineState pointer and a fdt pointer now that
> all RISC-V machines are using the FDT from the MachineState.
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/numa.c |  6 +++---
>  hw/riscv/spike.c|  6 +++---
>  hw/riscv/virt.c | 18 +-
>  include/hw/riscv/numa.h |  6 +++---
>  4 files changed, 18 insertions(+), 18 deletions(-)
>
> diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c
> index 7fe92d402f..f4343f5cde 100644
> --- a/hw/riscv/numa.c
> +++ b/hw/riscv/numa.c
> @@ -156,11 +156,11 @@ uint64_t riscv_socket_mem_size(const MachineState *ms, 
> int socket_id)
>  ms->numa_state->nodes[socket_id].node_mem : 0;
>  }
>
> -void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt,
> -   const char *node_name, int socket_id)
> +void riscv_socket_fdt_write_id(const MachineState *ms, const char *node_name,
> +   int socket_id)
>  {
>  if (numa_enabled(ms)) {
> -qemu_fdt_setprop_cell(fdt, node_name, "numa-node-id", socket_id);
> +qemu_fdt_setprop_cell(ms->fdt, node_name, "numa-node-id", socket_id);
>  }
>  }
>
> diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
> index 4a66016d69..05d34651cb 100644
> --- a/hw/riscv/spike.c
> +++ b/hw/riscv/spike.c
> @@ -121,7 +121,7 @@ static void create_fdt(SpikeState *s, const MemMapEntry 
> *memmap,
>  qemu_fdt_setprop_cell(fdt, cpu_name, "reg",
>  s->soc[socket].hartid_base + cpu);
>  qemu_fdt_setprop_string(fdt, cpu_name, "device_type", "cpu");
> -riscv_socket_fdt_write_id(mc, fdt, cpu_name, socket);
> +riscv_socket_fdt_write_id(mc, cpu_name, socket);
>  qemu_fdt_setprop_cell(fdt, cpu_name, "phandle", cpu_phandle);
>
>  intc_name = g_strdup_printf("%s/interrupt-controller", cpu_name);
> @@ -154,7 +154,7 @@ static void create_fdt(SpikeState *s, const MemMapEntry 
> *memmap,
>  qemu_fdt_setprop_cells(fdt, mem_name, "reg",
>  addr >> 32, addr, size >> 32, size);
>  qemu_fdt_setprop_string(fdt, mem_name, "device_type", "memory");
> -riscv_socket_fdt_write_id(mc, fdt, mem_name, socket);
> +riscv_socket_fdt_write_id(mc, mem_name, socket);
>  g_free(mem_name);
>
>  clint_addr = memmap[SPIKE_CLINT].base +
> @@ -167,7 +167,7 @@ static void create_fdt(SpikeState *s, const MemMapEntry 
> *memmap,
>  0x0, clint_addr, 0x0, memmap[SPIKE_CLINT].size);
>  qemu_fdt_setprop(fdt, clint_name, "interrupts-extended",
>  clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
> -riscv_socket_fdt_write_id(mc, fdt, clint_name, socket);
> +riscv_socket_fdt_write_id(mc, clint_name, socket);
>
>  g_free(clint_name);
>  g_free(clint_cells);
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 99a0a43a73..1d3bd25cb5 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -253,7 +253,7 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int 
> socket,
>  qemu_fdt_setprop_cell(mc->fdt, cpu_name, "reg",
>  s->soc[socket].hartid_base + cpu);
>  qemu_fdt_setprop_string(mc->fdt, cpu_name, "device_type", "cpu");
> -riscv_socket_fdt_write_id(mc, mc->fdt, cpu_name, socket);
> +riscv_socket_fdt_write_id(mc, cpu_name, socket);
>  qemu_fdt_setprop_cell(mc->fdt, cpu_name, "phandle", cpu_phandle);
>
>  intc_phandles[cpu] = (*phandle)++;
> @@ -291,7 +291,7 @@ static void create_fdt_socket_memory(RISCVVirtState *s,
>  qemu_fdt_setprop_cells(mc->fdt, mem_name, "reg",
>  addr >> 32, addr, size >> 32, size);
>  qemu_fdt_setprop_string(mc->fdt, mem_name, "device_type", "memory");
> -riscv_socket_fdt_write_id(mc, mc->fdt, mem_name, socket);
> +riscv_socket_fdt_write_id(mc, mem_name, socket);
>  g_free(mem_name);
>  }
>
> @@ -327,7 +327,7 @@ static void create_fdt_socket_clint(RISCVVirtState *s,
>  0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size);
>  qemu_fdt_setprop(mc->fdt, clint_name, "interrupts-extended",
>  clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
> -riscv_socket_fdt_write_id(mc, mc->fdt, clint_name, socket);
> +riscv_socket_fdt_write_id(mc, clint_name, socket);
>  g_free(clint_name);
>
>  g_free(clint_cells);
> @@ -372,7 +372,7 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
>  aclint_mswi_cells, aclint_cells_size);
>  qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0);
>  qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0);
> -riscv_socket_fdt_write_id(mc, mc->fdt, name, socket);
> +riscv_socket_fdt_write_id(mc, name, socket);
>  g_free(name);
>  }
>
> @@ -396,7 +396,7 @@ static void

Re: [PATCH 04/10] hw/riscv/virt.c: remove 'is_32_bit' param from create_fdt_socket_cpus()

2023-01-15 Thread Alistair Francis

On Thu, Jan 12, 2023 at 3:22 AM Daniel Henrique Barboza
 wrote:
>
> create_fdt_socket_cpus() writes a different 'mmu-type' value if we're
> running in 32 or 64 bits. However, the flag is being calculated during
> virt_machine_init(), and is passed around in create_fdt(), then
> create_fdt_socket(), and then finally create_fdt_socket_cpus(). None of
> the intermediate functions are using the flag, which is a bit
> misleading.
>
> Remove 'is_32_bit' flag from create_fdt_socket_cpus() and calculate it
> using the already available RISCVVirtState pointer. This will also
> change the signature of create_fdt_socket() and create_fdt(), making it
> clear that these functions don't do anything special when we're running
> in 32 bit mode.
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/virt.c | 18 +-
>  1 file changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 89c99ec1af..99a0a43a73 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -223,12 +223,13 @@ static void create_pcie_irq_map(RISCVVirtState *s, void 
> *fdt, char *nodename,
>
>  static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
> char *clust_name, uint32_t *phandle,
> -   bool is_32_bit, uint32_t *intc_phandles)
> +   uint32_t *intc_phandles)
>  {
>  int cpu;
>  uint32_t cpu_phandle;
>  MachineState *mc = MACHINE(s);
>  char *name, *cpu_name, *core_name, *intc_name;
> +bool is_32_bit = riscv_is_32bit(>soc[0]);
>
>  for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
>  cpu_phandle = (*phandle)++;
> @@ -721,7 +722,7 @@ static void create_fdt_pmu(RISCVVirtState *s)
>  }
>
>  static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
> -   bool is_32_bit, uint32_t *phandle,
> +   uint32_t *phandle,
> uint32_t *irq_mmio_phandle,
> uint32_t *irq_pcie_phandle,
> uint32_t *irq_virtio_phandle,
> @@ -750,7 +751,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>  qemu_fdt_add_subnode(mc->fdt, clust_name);
>
>  create_fdt_socket_cpus(s, socket, clust_name, phandle,
> -is_32_bit, _phandles[phandle_pos]);
> +   _phandles[phandle_pos]);
>
>  create_fdt_socket_memory(s, memmap, socket);
>
> @@ -998,8 +999,7 @@ static void create_fdt_fw_cfg(RISCVVirtState *s, const 
> MemMapEntry *memmap)
>  g_free(nodename);
>  }
>
> -static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
> -   bool is_32_bit)
> +static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap)
>  {
>  MachineState *mc = MACHINE(s);
>  uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
> @@ -1031,9 +1031,9 @@ static void create_fdt(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>  qemu_fdt_setprop_cell(mc->fdt, "/soc", "#size-cells", 0x2);
>  qemu_fdt_setprop_cell(mc->fdt, "/soc", "#address-cells", 0x2);
>
> -create_fdt_sockets(s, memmap, is_32_bit, ,
> -_mmio_phandle, _pcie_phandle, _virtio_phandle,
> -_pcie_phandle);
> +create_fdt_sockets(s, memmap, , _mmio_phandle,
> +   _pcie_phandle, _virtio_phandle,
> +   _pcie_phandle);
>
>  create_fdt_virtio(s, memmap, irq_virtio_phandle);
>
> @@ -1499,7 +1499,7 @@ static void virt_machine_init(MachineState *machine)
>  virt_flash_map(s, system_memory);
>
>  /* create device tree */
> -create_fdt(s, memmap, riscv_is_32bit(>soc[0]));
> +create_fdt(s, memmap);
>
>  s->machine_done.notify = virt_machine_done;
>  qemu_add_machine_init_done_notifier(>machine_done);
> --
> 2.39.0
>
>

Re: [PATCH 03/10] hw/riscv/sifive_u.c: simplify create_fdt()

2023-01-15 Thread Alistair Francis

On Thu, Jan 12, 2023 at 3:25 AM Daniel Henrique Barboza
 wrote:
>
> 'cmdline' isn't being used. Remove it.
>
> A MachineState pointer is being retrieved via a MACHINE() macro calling
> qdev_get_machine(). Use MACHINE(s) instead to avoid calling qdev().
>
>  'mem_size' is being set as machine->ram_size by the caller. Retrieve it
> via ms->ram_size.
>
> Cc: Palmer Dabbelt 
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/sifive_u.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
> index 9a75d4aa62..ccad386920 100644
> --- a/hw/riscv/sifive_u.c
> +++ b/hw/riscv/sifive_u.c
> @@ -94,9 +94,10 @@ static const MemMapEntry sifive_u_memmap[] = {
>  #define GEM_REVISION0x10070109
>
>  static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap,
> -   uint64_t mem_size, const char *cmdline, bool 
> is_32_bit)
> +   bool is_32_bit)
>  {
> -MachineState *ms = MACHINE(qdev_get_machine());
> +MachineState *ms = MACHINE(s);
> +uint64_t mem_size = ms->ram_size;
>  void *fdt;
>  int cpu, fdt_size;
>  uint32_t *cells;
> @@ -560,8 +561,7 @@ static void sifive_u_machine_init(MachineState *machine)
>qemu_allocate_irq(sifive_u_machine_reset, NULL, 
> 0));
>
>  /* create device tree */
> -create_fdt(s, memmap, machine->ram_size, machine->kernel_cmdline,
> -   riscv_is_32bit(>soc.u_cpus));
> +create_fdt(s, memmap, riscv_is_32bit(>soc.u_cpus));
>
>  if (s->start_in_flash) {
>  /*
> --
> 2.39.0
>
>

Re: [PATCH 02/10] hw/riscv/virt.c: simplify create_fdt()

2023-01-15 Thread Alistair Francis

On Thu, Jan 12, 2023 at 3:18 AM Daniel Henrique Barboza
 wrote:
>
> 'mem_size' and 'cmdline' aren't being used. Remove them.
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/virt.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index a931ed05ab..89c99ec1af 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -999,7 +999,7 @@ static void create_fdt_fw_cfg(RISCVVirtState *s, const 
> MemMapEntry *memmap)
>  }
>
>  static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
> -   uint64_t mem_size, const char *cmdline, bool 
> is_32_bit)
> +   bool is_32_bit)
>  {
>  MachineState *mc = MACHINE(s);
>  uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
> @@ -1499,8 +1499,7 @@ static void virt_machine_init(MachineState *machine)
>  virt_flash_map(s, system_memory);
>
>  /* create device tree */
> -create_fdt(s, memmap, machine->ram_size, machine->kernel_cmdline,
> -   riscv_is_32bit(>soc[0]));
> +create_fdt(s, memmap, riscv_is_32bit(>soc[0]));
>
>  s->machine_done.notify = virt_machine_done;
>  qemu_add_machine_init_done_notifier(>machine_done);
> --
> 2.39.0
>
>

Re: [PATCH 01/10] hw/riscv/spike.c: simplify create_fdt()

2023-01-15 Thread Alistair Francis

On Thu, Jan 12, 2023 at 3:21 AM Daniel Henrique Barboza
 wrote:
>
> 'mem_size' and 'cmdline' are unused.
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/spike.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
> index c517885e6e..4a66016d69 100644
> --- a/hw/riscv/spike.c
> +++ b/hw/riscv/spike.c
> @@ -49,7 +49,6 @@ static const MemMapEntry spike_memmap[] = {
>  };
>
>  static void create_fdt(SpikeState *s, const MemMapEntry *memmap,
> -   uint64_t mem_size, const char *cmdline,
> bool is_32_bit, bool htif_custom_base)
>  {
>  void *fdt;
> @@ -299,8 +298,7 @@ static void spike_board_init(MachineState *machine)
>  }
>
>  /* Create device tree */
> -create_fdt(s, memmap, machine->ram_size, machine->kernel_cmdline,
> -   riscv_is_32bit(>soc[0]), htif_custom_base);
> +create_fdt(s, memmap, riscv_is_32bit(>soc[0]), htif_custom_base);
>
>  /* Load kernel */
>  if (machine->kernel_filename) {
> --
> 2.39.0
>
>

Re: [PATCH] target/riscv: Use TARGET_FMT_lx for env->mhartid

2023-01-15 Thread Alistair Francis

On Tue, Jan 10, 2023 at 1:28 AM Bin Meng  wrote:
>
> env->mhartid is currently casted to long before printed, which drops
> the high 32-bit for rv64 on 32-bit host. Use TARGET_FMT_lx instead.
>
> Signed-off-by: Bin Meng 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
>
>  target/riscv/cpu.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index cc75ca7667..a5ed6d3f63 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -660,9 +660,9 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
> **errp)
>  (env->priv_ver < isa_edata_arr[i].min_version)) {
>  isa_ext_update_enabled(cpu, _edata_arr[i], false);
>  #ifndef CONFIG_USER_ONLY
> -warn_report("disabling %s extension for hart 0x%lx because "
> -"privilege spec version does not match",
> -isa_edata_arr[i].name, (unsigned long)env->mhartid);
> +warn_report("disabling %s extension for hart 0x" TARGET_FMT_lx
> +" because privilege spec version does not match",
> +isa_edata_arr[i].name, env->mhartid);
>  #else
>  warn_report("disabling %s extension because "
>  "privilege spec version does not match",
> --
> 2.34.1
>
>

Re: [RFC v2 05/13] vdpa net: add migration blocker if cannot migrate cvq

2023-01-15 Thread Jason Wang




在 2023/1/13 15:46, Eugenio Perez Martin 写道:

On Fri, Jan 13, 2023 at 5:25 AM Jason Wang  wrote:


在 2023/1/13 01:24, Eugenio Pérez 写道:

A vdpa net device must initialize with SVQ in order to be migratable,
and initialization code verifies conditions.  If the device is not
initialized with the x-svq parameter, it will not expose _F_LOG so vhost
sybsystem will block VM migration from its initialization.

Next patches change this. Net data VQs will be shadowed only at
migration time and vdpa net devices need to expose _F_LOG as long as it
can go to SVQ.

Since we don't know that at initialization time but at start, add an
independent blocker at CVQ.

Signed-off-by: Eugenio Pérez 
---
   net/vhost-vdpa.c | 35 +--
   1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 631424d9c4..2ca93e850a 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -26,12 +26,14 @@
   #include 
   #include "standard-headers/linux/virtio_net.h"
   #include "monitor/monitor.h"
+#include "migration/blocker.h"
   #include "hw/virtio/vhost.h"

   /* Todo:need to add the multiqueue support here */
   typedef struct VhostVDPAState {
   NetClientState nc;
   struct vhost_vdpa vhost_vdpa;
+Error *migration_blocker;


Any reason we can't use the mivration_blocker in vhost_dev structure?

I believe we don't need to wait until start to know we can't migrate.


Device migratability also depends on features that the guest acks.



This sounds a little bit tricky, more below:




For example, if the device does not support ASID it can be migrated as
long as _F_CVQ is not acked.



The management may notice a non-consistent behavior in this case. I 
wonder if we can simply check the host features.


Thanks




Thanks!


Thanks



   VHostNetState *vhost_net;

   /* Control commands shadow buffers */
@@ -433,9 +435,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
   g_strerror(errno), errno);
   return -1;
   }
-if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
-!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
-return 0;
+if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
+error_setg(>migration_blocker,
+   "vdpa device %s does not support ASID",
+   nc->name);
+goto out;
+}
+if (!vhost_vdpa_net_valid_svq_features(v->dev->features,
+   >migration_blocker)) {
+goto out;
   }

   /*
@@ -455,7 +463,10 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
   }

   if (group == cvq_group) {
-return 0;
+error_setg(>migration_blocker,
+"vdpa %s vq %d group %"PRId64" is the same as cvq group "
+"%"PRId64, nc->name, i, group, cvq_group);
+goto out;
   }
   }

@@ -468,8 +479,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
   s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;

   out:
-if (!s->vhost_vdpa.shadow_vqs_enabled) {
-return 0;
+if (s->migration_blocker) {
+Error *errp = NULL;
+r = migrate_add_blocker(s->migration_blocker, );
+if (unlikely(r != 0)) {
+g_clear_pointer(>migration_blocker, error_free);
+error_report_err(errp);
+}
+
+return r;
   }

   s0 = vhost_vdpa_net_first_nc_vdpa(s);
@@ -513,6 +531,11 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
   vhost_vdpa_cvq_unmap_buf(>vhost_vdpa, s->status);
   }

+if (s->migration_blocker) {
+migrate_del_blocker(s->migration_blocker);
+g_clear_pointer(>migration_blocker, error_free);
+}
+
   vhost_vdpa_net_client_stop(nc);
   }

Re: [RFC v2 04/13] vdpa: rewind at get_base, not set_base

2023-01-15 Thread Jason Wang




在 2023/1/13 15:40, Eugenio Perez Martin 写道:

On Fri, Jan 13, 2023 at 5:10 AM Jason Wang  wrote:

On Fri, Jan 13, 2023 at 1:24 AM Eugenio Pérez  wrote:

At this moment it is only possible to migrate to a vdpa device running
with x-svq=on. As a protective measure, the rewind of the inflight
descriptors was done at the destination. That way if the source sent a
virtqueue with inuse descriptors they are always discarded.

Since this series allows to migrate also to passthrough devices with no
SVQ, the right thing to do is to rewind at the source so base of vrings
are correct.

Support for inflight descriptors may be added in the future.

Signed-off-by: Eugenio Pérez 
---
  include/hw/virtio/vhost-backend.h |  4 +++
  hw/virtio/vhost-vdpa.c| 46 +++
  hw/virtio/vhost.c |  3 ++
  3 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/include/hw/virtio/vhost-backend.h 
b/include/hw/virtio/vhost-backend.h
index c5ab49051e..ec3fbae58d 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -130,6 +130,9 @@ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);

  typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
 int fd);
+
+typedef void (*vhost_reset_status_op)(struct vhost_dev *dev);
+
  typedef struct VhostOps {
  VhostBackendType backend_type;
  vhost_backend_init vhost_backend_init;
@@ -177,6 +180,7 @@ typedef struct VhostOps {
  vhost_get_device_id_op vhost_get_device_id;
  vhost_force_iommu_op vhost_force_iommu;
  vhost_set_config_call_op vhost_set_config_call;
+vhost_reset_status_op vhost_reset_status;
  } VhostOps;

  int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 542e003101..28a52ddc78 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1132,14 +1132,23 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, 
bool started)
  if (started) {
  memory_listener_register(>listener, _space_memory);
  return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
-} else {
-vhost_vdpa_reset_device(dev);
-vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
-   VIRTIO_CONFIG_S_DRIVER);
-memory_listener_unregister(>listener);
+}

-return 0;
+return 0;
+}
+
+static void vhost_vdpa_reset_status(struct vhost_dev *dev)
+{
+struct vhost_vdpa *v = dev->opaque;
+
+if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
+return;
  }
+
+vhost_vdpa_reset_device(dev);
+vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
+VIRTIO_CONFIG_S_DRIVER);
+memory_listener_unregister(>listener);
  }

  static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
@@ -1182,18 +1191,7 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev 
*dev,
 struct vhost_vring_state *ring)
  {
  struct vhost_vdpa *v = dev->opaque;
-VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);

-/*
- * vhost-vdpa devices does not support in-flight requests. Set all of them
- * as available.
- *
- * TODO: This is ok for networking, but other kinds of devices might
- * have problems with these retransmissions.
- */
-while (virtqueue_rewind(vq, 1)) {
-continue;
-}
  if (v->shadow_vqs_enabled) {
  /*
   * Device vring base was set at device start. SVQ base is handled by
@@ -1212,6 +1210,19 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev 
*dev,
  int ret;

  if (v->shadow_vqs_enabled) {
+VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
+
+/*
+ * vhost-vdpa devices does not support in-flight requests. Set all of
+ * them as available.
+ *
+ * TODO: This is ok for networking, but other kinds of devices might
+ * have problems with these retransmissions.
+ */
+while (virtqueue_rewind(vq, 1)) {
+continue;
+}
+
  ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
  return 0;
  }
@@ -1326,4 +1337,5 @@ const VhostOps vdpa_ops = {
  .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
  .vhost_force_iommu = vhost_vdpa_force_iommu,
  .vhost_set_config_call = vhost_vdpa_set_config_call,
+.vhost_reset_status = vhost_vdpa_reset_status,

Can we simply use the NetClient stop method here?


Ouch, I squashed two patches by mistake here.

All the vhost_reset_status part should be independent of this patch,
and I was especially interested in its feedback. It had this message:

 vdpa: move vhost reset after get vring base

 The function vhost.c:vhost_dev_stop calls vhost operation
 vhost_dev_start(false). In the case of

[PATCH] hw/display/xlnx_dp: fix abort in xlnx_dp_change_graphic_fmt()

2023-01-15 Thread Qiang Liu

xlnx_dp_change_graphic_fmt() will directly abort if either graphic
format or the video format is not supported.

This patch directly let xlnx_dp_change_graphic_fmt() return if the
formats are not supported.

xlnx_dp_change_graphic_fmt() has two callsites in xlnx_dp_avbufm_write()
and xlnx_dp_reset(). I think it may be OK to drop the abort in
xlnx_dp_change_graphic_fmt() because the error information will be
printed.

Fixes: 58ac482a66de ("introduce xlnx-dp")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1415
Reported-by: Qiang Liu 
Signed-off-by: Qiang Liu 
---
 hw/display/xlnx_dp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c
index b0828d65aa..407518c870 100644
--- a/hw/display/xlnx_dp.c
+++ b/hw/display/xlnx_dp.c
@@ -641,7 +641,7 @@ static void xlnx_dp_change_graphic_fmt(XlnxDPState *s)
 default:
 error_report("%s: unsupported graphic format %u", __func__,
  s->avbufm_registers[AV_BUF_FORMAT] & DP_GRAPHIC_MASK);
-abort();
+return;
 }
 
 switch (s->avbufm_registers[AV_BUF_FORMAT] & DP_NL_VID_FMT_MASK) {
@@ -657,7 +657,7 @@ static void xlnx_dp_change_graphic_fmt(XlnxDPState *s)
 default:
 error_report("%s: unsupported video format %u", __func__,
  s->avbufm_registers[AV_BUF_FORMAT] & DP_NL_VID_FMT_MASK);
-abort();
+return;
 }
 
 xlnx_dp_recreate_surface(s);
-- 
2.25.1

Re: [PATCH] target/riscv: Use TARGET_FMT_lx for env->mhartid

2023-01-15 Thread Alistair Francis

On Tue, Jan 10, 2023 at 1:28 AM Bin Meng  wrote:
>
> env->mhartid is currently casted to long before printed, which drops
> the high 32-bit for rv64 on 32-bit host. Use TARGET_FMT_lx instead.
>
> Signed-off-by: Bin Meng 

Reviewed-by: Alistair Francis 

Alistair

> ---
>
>  target/riscv/cpu.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index cc75ca7667..a5ed6d3f63 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -660,9 +660,9 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
> **errp)
>  (env->priv_ver < isa_edata_arr[i].min_version)) {
>  isa_ext_update_enabled(cpu, _edata_arr[i], false);
>  #ifndef CONFIG_USER_ONLY
> -warn_report("disabling %s extension for hart 0x%lx because "
> -"privilege spec version does not match",
> -isa_edata_arr[i].name, (unsigned long)env->mhartid);
> +warn_report("disabling %s extension for hart 0x" TARGET_FMT_lx
> +" because privilege spec version does not match",
> +isa_edata_arr[i].name, env->mhartid);
>  #else
>  warn_report("disabling %s extension because "
>  "privilege spec version does not match",
> --
> 2.34.1
>
>

[PATCH v2] hw/net/lan9118: log [read|write]b when mode_16bit is enabled rather than abort

2023-01-15 Thread Qiang Liu

This patch replaces hw_error to guest error log for [read|write]b
accesses when mode_16bit is enabled. This avoids aborting qemu.

Fixes: 1248f8d4cbc3 ("hw/lan9118: Add basic 16-bit mode support.")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1433
Reported-by: Qiang Liu 
Signed-off-by: Qiang Liu 
Suggested-by: Philippe Mathieu-Daudé 
---
v2 removes BADF() macro and the "hw/hw.h" inclusion
---
 hw/net/lan9118.c | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c
index f1cba55967..e5c4af182d 100644
--- a/hw/net/lan9118.c
+++ b/hw/net/lan9118.c
@@ -15,7 +15,6 @@
 #include "migration/vmstate.h"
 #include "net/net.h"
 #include "net/eth.h"
-#include "hw/hw.h"
 #include "hw/irq.h"
 #include "hw/net/lan9118.h"
 #include "hw/ptimer.h"
@@ -32,12 +31,8 @@
 #ifdef DEBUG_LAN9118
 #define DPRINTF(fmt, ...) \
 do { printf("lan9118: " fmt , ## __VA_ARGS__); } while (0)
-#define BADF(fmt, ...) \
-do { hw_error("lan9118: error: " fmt , ## __VA_ARGS__);} while (0)
 #else
 #define DPRINTF(fmt, ...) do {} while(0)
-#define BADF(fmt, ...) \
-do { fprintf(stderr, "lan9118: error: " fmt , ## __VA_ARGS__);} while (0)
 #endif
 
 /* The tx and rx fifo ports are a range of aliased 32-bit registers */
@@ -848,7 +843,8 @@ static uint32_t do_phy_read(lan9118_state *s, int reg)
 case 30: /* Interrupt mask */
 return s->phy_int_mask;
 default:
-BADF("PHY read reg %d\n", reg);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "do_phy_read: PHY read reg %d\n", reg);
 return 0;
 }
 }
@@ -876,7 +872,8 @@ static void do_phy_write(lan9118_state *s, int reg, 
uint32_t val)
 phy_update_irq(s);
 break;
 default:
-BADF("PHY write reg %d = 0x%04x\n", reg, val);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "do_phy_write: PHY write reg %d = 0x%04x\n", reg, val);
 }
 }
 
@@ -1209,7 +1206,8 @@ static void lan9118_16bit_mode_write(void *opaque, hwaddr 
offset,
 return;
 }
 
-hw_error("lan9118_write: Bad size 0x%x\n", size);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "lan9118_16bit_mode_write: Bad size 0x%x\n", size);
 }
 
 static uint64_t lan9118_readl(void *opaque, hwaddr offset,
@@ -1324,7 +1322,8 @@ static uint64_t lan9118_16bit_mode_read(void *opaque, 
hwaddr offset,
 return lan9118_readl(opaque, offset, size);
 }
 
-hw_error("lan9118_read: Bad size 0x%x\n", size);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "lan9118_16bit_mode_read: Bad size 0x%x\n", size);
 return 0;
 }
 
-- 
2.25.1

Re: [PATCH v2 0/2] target/riscv/cpu: fix sifive_u 32/64bits boot in riscv-to-apply.next

2023-01-15 Thread Alistair Francis

On Sat, Jan 14, 2023 at 3:55 AM Daniel Henrique Barboza
 wrote:
>
> Hi,
>
> In this version I fixed the commit message typos pointed by Bin. I've
> also added some notes about the code repetition the fix is introducing
> in the cpu_init() functions.
>
> The patches are based on riscv-to-apply.next at c1e76da3e668
> ("target/riscv/cpu.c: Fix elen check").
>
> Changes from v1:
> - patch 1:
>   - fixed commit message typos
> v1 review: https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02035.html
>
> Daniel Henrique Barboza (2):
>   target/riscv/cpu: set cpu->cfg in register_cpu_props()
>   target/riscv/cpu.c: do not skip misa logic in riscv_cpu_realize()

Thanks!

Applied to riscv-to-apply.next

Alistair

>
>  target/riscv/cpu.c | 439 +
>  target/riscv/cpu.h |   4 +
>  2 files changed, 249 insertions(+), 194 deletions(-)
>
> --
> 2.39.0
>
>

Re: [RFC v2 02/13] vdpa net: move iova tree creation from init to start

2023-01-15 Thread Jason Wang




在 2023/1/13 15:28, Eugenio Perez Martin 写道:

On Fri, Jan 13, 2023 at 4:53 AM Jason Wang  wrote:

On Fri, Jan 13, 2023 at 1:24 AM Eugenio Pérez  wrote:

Only create iova_tree if and when it is needed.

The cleanup keeps being responsability of last VQ but this change allows
to merge both cleanup functions.

Signed-off-by: Eugenio Pérez 
---
  net/vhost-vdpa.c | 101 +--
  1 file changed, 71 insertions(+), 30 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index de5ed8ff22..75cca497c8 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -178,13 +178,9 @@ err_init:
  static void vhost_vdpa_cleanup(NetClientState *nc)
  {
  VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
-struct vhost_dev *dev = >vhost_net->dev;

  qemu_vfree(s->cvq_cmd_out_buffer);
  qemu_vfree(s->status);
-if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
-g_clear_pointer(>vhost_vdpa.iova_tree, vhost_iova_tree_delete);
-}
  if (s->vhost_net) {
  vhost_net_cleanup(s->vhost_net);
  g_free(s->vhost_net);
@@ -234,10 +230,64 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, 
const uint8_t *buf,
  return size;
  }

+/** From any vdpa net client, get the netclient of first queue pair */
+static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
+{
+NICState *nic = qemu_get_nic(s->nc.peer);
+NetClientState *nc0 = qemu_get_peer(nic->ncs, 0);
+
+return DO_UPCAST(VhostVDPAState, nc, nc0);
+}
+
+static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
+{
+struct vhost_vdpa *v = >vhost_vdpa;
+
+if (v->shadow_vqs_enabled) {
+v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
+   v->iova_range.last);
+}
+}
+
+static int vhost_vdpa_net_data_start(NetClientState *nc)
+{
+VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+struct vhost_vdpa *v = >vhost_vdpa;
+
+assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+if (v->index == 0) {
+vhost_vdpa_net_data_start_first(s);
+return 0;
+}
+
+if (v->shadow_vqs_enabled) {
+VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
+v->iova_tree = s0->vhost_vdpa.iova_tree;
+}

It looks to me the logic here is somehow the same as
vhost_vdpa_net_cvq_start(), can we unify the them?


It depends on what you mean by unify :). But we can explore it for sure.

We can call vhost_vdpa_net_data_start, but the steps to do if
s0->vhost_vdpa.iova_tree == NULL are different. Data queues must do
nothing, but CVQ must create a new iova tree.

So one possibility is to convert this part of vhost_vdpa_net_cvq_start:
 s0 = vhost_vdpa_net_first_nc_vdpa(s);
 if (s0->vhost_vdpa.iova_tree) {
 /* SVQ is already configured for all virtqueues */
 v->iova_tree = s0->vhost_vdpa.iova_tree;
 } else {
 v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
v->iova_range.last);
 }

into:
 vhost_vdpa_net_data_start(nc);
 if (!v->iova_tree) {
 v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
v->iova_range.last);
 }

I'm ok with the change but it's less clear in my opinion: it's not
obvious that net_data_start is in charge of setting v->iova_tree to
me.



Ok.




Another possibility is to abstract something like
first_nc_iova_tree(), but we need to check more fields of s0 later
(shadow_data) so I'm not sure about the benefit.

Is that what you have in mind?



Kind of, but I think we can leave the code as is.

In the future, as discussed, we need to introduce something like a 
parent or opaque structure for NetClientState structure, it can simply a 
lot of things: we can have one same common parent for all queues, then 
there's no need for the trick like first_nc_iova_tree() and other 
similar tricks.


Thanks



Thanks!


+
+return 0;
+}
+
+static void vhost_vdpa_net_client_stop(NetClientState *nc)
+{
+VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+struct vhost_dev *dev;
+
+assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+dev = s->vhost_vdpa.dev;
+if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
+g_clear_pointer(>vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+}
+}
+
  static NetClientInfo net_vhost_vdpa_info = {
  .type = NET_CLIENT_DRIVER_VHOST_VDPA,
  .size = sizeof(VhostVDPAState),
  .receive = vhost_vdpa_receive,
+.start = vhost_vdpa_net_data_start,
+.stop = vhost_vdpa_net_client_stop,
  .cleanup = vhost_vdpa_cleanup,
  .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
  .has_ufo = vhost_vdpa_has_ufo,
@@ -351,7 +401,7 @@ dma_map_err:

  static int vhost_vdpa_net_cvq_start(NetClientState *nc)
  {
-VhostVDPAState *s;
+VhostVDPAState *s, *s0;
  struct vhost_vdpa *v;

Re: [RFC v2 01/13] vdpa: fix VHOST_BACKEND_F_IOTLB_ASID flag check

2023-01-15 Thread Jason Wang




在 2023/1/13 14:42, Eugenio Perez Martin 写道:

On Fri, Jan 13, 2023 at 4:12 AM Jason Wang  wrote:

On Fri, Jan 13, 2023 at 1:24 AM Eugenio Pérez  wrote:

VHOST_BACKEND_F_IOTLB_ASID is the feature bit, not the bitmask. Since
the device under test also provided VHOST_BACKEND_F_IOTLB_MSG_V2 and
VHOST_BACKEND_F_IOTLB_BATCH, this went unnoticed.

Fixes: c1a1008685 ("vdpa: always start CVQ in SVQ mode if possible")
Signed-off-by: Eugenio Pérez 

Acked-by: Jason Wang 

Do we need this for -stable?


The commit c1a1008685 was introduced in this development window so
there is no stable version of qemu with that patch. But I'm ok to CC
stable just in case for sure.



Right, just have a check and it doesn't there for 7.2 so there should be 
no need for that.


Thanks




Thanks!


Thanks


---
  net/vhost-vdpa.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 1a13a34d35..de5ed8ff22 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -384,7 +384,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
  g_strerror(errno), errno);
  return -1;
  }
-if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) ||
+if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
  !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
  return 0;
  }
--
2.31.1

Re: [PATCH v7 2/3] hw/riscv/boot.c: make riscv_load_initrd() static

2023-01-15 Thread Alistair Francis

On Sat, Jan 14, 2023 at 3:39 AM Daniel Henrique Barboza
 wrote:
>
> The only remaining caller is riscv_load_kernel_and_initrd() which
> belongs to the same file.
>
> Signed-off-by: Daniel Henrique Barboza 
> Reviewed-by: Philippe Mathieu-Daudé 
> Reviewed-by: Bin Meng 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/boot.c | 80 -
>  include/hw/riscv/boot.h |  1 -
>  2 files changed, 40 insertions(+), 41 deletions(-)
>
> diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
> index 4888d5c1e0..e868fb6ade 100644
> --- a/hw/riscv/boot.c
> +++ b/hw/riscv/boot.c
> @@ -173,6 +173,46 @@ target_ulong riscv_load_firmware(const char 
> *firmware_filename,
>  exit(1);
>  }
>
> +static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry)
> +{
> +const char *filename = machine->initrd_filename;
> +uint64_t mem_size = machine->ram_size;
> +void *fdt = machine->fdt;
> +hwaddr start, end;
> +ssize_t size;
> +
> +g_assert(filename != NULL);
> +
> +/*
> + * We want to put the initrd far enough into RAM that when the
> + * kernel is uncompressed it will not clobber the initrd. However
> + * on boards without much RAM we must ensure that we still leave
> + * enough room for a decent sized initrd, and on boards with large
> + * amounts of RAM we must avoid the initrd being so far up in RAM
> + * that it is outside lowmem and inaccessible to the kernel.
> + * So for boards with less  than 256MB of RAM we put the initrd
> + * halfway into RAM, and for boards with 256MB of RAM or more we put
> + * the initrd at 128MB.
> + */
> +start = kernel_entry + MIN(mem_size / 2, 128 * MiB);
> +
> +size = load_ramdisk(filename, start, mem_size - start);
> +if (size == -1) {
> +size = load_image_targphys(filename, start, mem_size - start);
> +if (size == -1) {
> +error_report("could not load ramdisk '%s'", filename);
> +exit(1);
> +}
> +}
> +
> +/* Some RISC-V machines (e.g. opentitan) don't have a fdt. */
> +if (fdt) {
> +end = start + size;
> +qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", start);
> +qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", end);
> +}
> +}
> +
>  target_ulong riscv_load_kernel(MachineState *machine,
> target_ulong kernel_start_addr,
> bool load_initrd,
> @@ -225,46 +265,6 @@ out:
>  return kernel_entry;
>  }
>
> -void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry)
> -{
> -const char *filename = machine->initrd_filename;
> -uint64_t mem_size = machine->ram_size;
> -void *fdt = machine->fdt;
> -hwaddr start, end;
> -ssize_t size;
> -
> -g_assert(filename != NULL);
> -
> -/*
> - * We want to put the initrd far enough into RAM that when the
> - * kernel is uncompressed it will not clobber the initrd. However
> - * on boards without much RAM we must ensure that we still leave
> - * enough room for a decent sized initrd, and on boards with large
> - * amounts of RAM we must avoid the initrd being so far up in RAM
> - * that it is outside lowmem and inaccessible to the kernel.
> - * So for boards with less  than 256MB of RAM we put the initrd
> - * halfway into RAM, and for boards with 256MB of RAM or more we put
> - * the initrd at 128MB.
> - */
> -start = kernel_entry + MIN(mem_size / 2, 128 * MiB);
> -
> -size = load_ramdisk(filename, start, mem_size - start);
> -if (size == -1) {
> -size = load_image_targphys(filename, start, mem_size - start);
> -if (size == -1) {
> -error_report("could not load ramdisk '%s'", filename);
> -exit(1);
> -}
> -}
> -
> -/* Some RISC-V machines (e.g. opentitan) don't have a fdt. */
> -if (fdt) {
> -end = start + size;
> -qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", start);
> -qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", end);
> -}
> -}
> -
>  uint64_t riscv_load_fdt(hwaddr dram_base, uint64_t mem_size, void *fdt)
>  {
>  uint64_t temp, fdt_addr;
> diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
> index c3de897371..cbd131bad7 100644
> --- a/include/hw/riscv/boot.h
> +++ b/include/hw/riscv/boot.h
> @@ -47,7 +47,6 @@ target_ulong riscv_load_kernel(MachineState *machine,
> target_ulong firmware_end_addr,
> bool load_initrd,
> symbol_fn_t sym_cb);
> -void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry);
>  uint64_t riscv_load_fdt(hwaddr dram_start, uint64_t dram_size, void *fdt);
>  void riscv_setup_rom_reset_vec(MachineState *machine, RISCVHartArrayState 
> *harts,
> hwaddr saddr,
> --
> 2.39.0
>
>

Re: [PATCH 4/4] hw: Replace qemu_or_irq typedef by OrIRQState

2023-01-15 Thread Alistair Francis

On Sat, Jan 14, 2023 at 6:04 AM Philippe Mathieu-Daudé
 wrote:
>
> OBJECT_DECLARE_SIMPLE_TYPE() macro provides the OrIRQState
> declaration for free. Besides, the QOM code style is to use
> the structure name as typedef, and QEMU style is to use Camel
> Case, so rename qemu_or_irq as OrIRQState.
>
> Mechanical change using:
>
>   $ sed -i -e 's/qemu_or_irq/OrIRQState/g' $(git grep -l qemu_or_irq)
>
> Signed-off-by: Philippe Mathieu-Daudé 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/arm/exynos4210.c  |  4 ++--
>  hw/arm/mps2-tz.c |  2 +-
>  hw/core/or-irq.c | 18 +-
>  hw/pci-host/raven.c  |  2 +-
>  include/hw/arm/armsse.h  |  6 +++---
>  include/hw/arm/bcm2835_peripherals.h |  2 +-
>  include/hw/arm/exynos4210.h  |  4 ++--
>  include/hw/arm/stm32f205_soc.h   |  2 +-
>  include/hw/arm/stm32f405_soc.h   |  2 +-
>  include/hw/arm/xlnx-versal.h |  6 +++---
>  include/hw/arm/xlnx-zynqmp.h |  2 +-
>  include/hw/or-irq.h  |  2 --
>  12 files changed, 25 insertions(+), 27 deletions(-)
>
> diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
> index 8dafa2215b..6f2dda13f6 100644
> --- a/hw/arm/exynos4210.c
> +++ b/hw/arm/exynos4210.c
> @@ -507,7 +507,7 @@ static uint64_t exynos4210_calc_affinity(int cpu)
>  return (0x9 << ARM_AFF1_SHIFT) | cpu;
>  }
>
> -static DeviceState *pl330_create(uint32_t base, qemu_or_irq *orgate,
> +static DeviceState *pl330_create(uint32_t base, OrIRQState *orgate,
>   qemu_irq irq, int nreq, int nevents, int 
> width)
>  {
>  SysBusDevice *busdev;
> @@ -806,7 +806,7 @@ static void exynos4210_init(Object *obj)
>
>  for (i = 0; i < ARRAY_SIZE(s->pl330_irq_orgate); i++) {
>  char *name = g_strdup_printf("pl330-irq-orgate%d", i);
> -qemu_or_irq *orgate = >pl330_irq_orgate[i];
> +OrIRQState *orgate = >pl330_irq_orgate[i];
>
>  object_initialize_child(obj, name, orgate, TYPE_OR_IRQ);
>  g_free(name);
> diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
> index 284c09c91d..07aecd9497 100644
> --- a/hw/arm/mps2-tz.c
> +++ b/hw/arm/mps2-tz.c
> @@ -152,7 +152,7 @@ struct MPS2TZMachineState {
>  TZMSC msc[4];
>  CMSDKAPBUART uart[6];
>  SplitIRQ sec_resp_splitter;
> -qemu_or_irq uart_irq_orgate;
> +OrIRQState uart_irq_orgate;
>  DeviceState *lan9118;
>  SplitIRQ cpu_irq_splitter[MPS2TZ_NUMIRQ_MAX];
>  Clock *sysclk;
> diff --git a/hw/core/or-irq.c b/hw/core/or-irq.c
> index d8f3754e96..1df4bc05a7 100644
> --- a/hw/core/or-irq.c
> +++ b/hw/core/or-irq.c
> @@ -31,7 +31,7 @@
>
>  static void or_irq_handler(void *opaque, int n, int level)
>  {
> -qemu_or_irq *s = OR_IRQ(opaque);
> +OrIRQState *s = OR_IRQ(opaque);
>  int or_level = 0;
>  int i;
>
> @@ -46,7 +46,7 @@ static void or_irq_handler(void *opaque, int n, int level)
>
>  static void or_irq_reset(DeviceState *dev)
>  {
> -qemu_or_irq *s = OR_IRQ(dev);
> +OrIRQState *s = OR_IRQ(dev);
>  int i;
>
>  for (i = 0; i < MAX_OR_LINES; i++) {
> @@ -56,7 +56,7 @@ static void or_irq_reset(DeviceState *dev)
>
>  static void or_irq_realize(DeviceState *dev, Error **errp)
>  {
> -qemu_or_irq *s = OR_IRQ(dev);
> +OrIRQState *s = OR_IRQ(dev);
>
>  assert(s->num_lines <= MAX_OR_LINES);
>
> @@ -65,7 +65,7 @@ static void or_irq_realize(DeviceState *dev, Error **errp)
>
>  static void or_irq_init(Object *obj)
>  {
> -qemu_or_irq *s = OR_IRQ(obj);
> +OrIRQState *s = OR_IRQ(obj);
>
>  qdev_init_gpio_out(DEVICE(obj), >out_irq, 1);
>  }
> @@ -84,7 +84,7 @@ static void or_irq_init(Object *obj)
>
>  static bool vmstate_extras_needed(void *opaque)
>  {
> -qemu_or_irq *s = OR_IRQ(opaque);
> +OrIRQState *s = OR_IRQ(opaque);
>
>  return s->num_lines >= OLD_MAX_OR_LINES;
>  }
> @@ -95,7 +95,7 @@ static const VMStateDescription vmstate_or_irq_extras = {
>  .minimum_version_id = 1,
>  .needed = vmstate_extras_needed,
>  .fields = (VMStateField[]) {
> -VMSTATE_VARRAY_UINT16_UNSAFE(levels, qemu_or_irq, num_lines, 0,
> +VMSTATE_VARRAY_UINT16_UNSAFE(levels, OrIRQState, num_lines, 0,
>   vmstate_info_bool, bool),
>  VMSTATE_END_OF_LIST(),
>  },
> @@ -106,7 +106,7 @@ static const VMStateDescription vmstate_or_irq = {
>  .version_id = 1,
>  .minimum_version_id = 1,
>  .fields = (VMStateField[]) {
> -VMSTATE_BOOL_SUB_ARRAY(levels, qemu_or_irq, 0, OLD_MAX_OR_LINES),
> +VMSTATE_BOOL_SUB_ARRAY(levels, OrIRQState, 0, OLD_MAX_OR_LINES),
>  VMSTATE_END_OF_LIST(),
>  },
>  .subsections = (const VMStateDescription*[]) {
> @@ -116,7 +116,7 @@ static const VMStateDescription vmstate_or_irq = {
>  };
>
>  static Property or_irq_properties[] = {
> -DEFINE_PROP_UINT16("num-lines", qemu_or_irq, num_lines, 1),
> +

Re: [PATCH 3/4] hw/or-irq: Declare QOM macros using OBJECT_DECLARE_SIMPLE_TYPE()

2023-01-15 Thread Alistair Francis

On Sat, Jan 14, 2023 at 6:03 AM Philippe Mathieu-Daudé
 wrote:
>
> Missed during automatic conversion from commit 8063396bf3
> ("Use OBJECT_DECLARE_SIMPLE_TYPE when possible").
>
> Signed-off-by: Philippe Mathieu-Daudé 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  include/hw/or-irq.h | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/include/hw/or-irq.h b/include/hw/or-irq.h
> index f2f0a27381..131abc2e0c 100644
> --- a/include/hw/or-irq.h
> +++ b/include/hw/or-irq.h
> @@ -37,8 +37,7 @@
>
>  typedef struct OrIRQState qemu_or_irq;
>
> -DECLARE_INSTANCE_CHECKER(qemu_or_irq, OR_IRQ,
> - TYPE_OR_IRQ)
> +OBJECT_DECLARE_SIMPLE_TYPE(OrIRQState, OR_IRQ)
>
>  struct OrIRQState {
>  DeviceState parent_obj;
> --
> 2.38.1
>
>

Re: [PATCH 2/4] hw/irq: Declare QOM macros using OBJECT_DECLARE_SIMPLE_TYPE()

2023-01-15 Thread Alistair Francis

On Sat, Jan 14, 2023 at 6:03 AM Philippe Mathieu-Daudé
 wrote:
>
> QOM *DECLARE* macros expect a typedef as first argument,
> not a structure. Replace 'struct IRQState' by 'IRQState'
> to avoid when modifying the macros:
>
>   ../hw/core/irq.c:29:1: error: declaration of anonymous struct must be a 
> definition
>   DECLARE_INSTANCE_CHECKER(struct IRQState, IRQ,
>   ^
>
> Use OBJECT_DECLARE_SIMPLE_TYPE instead of DECLARE_INSTANCE_CHECKER.
>
> Signed-off-by: Philippe Mathieu-Daudé 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/core/irq.c | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/hw/core/irq.c b/hw/core/irq.c
> index 3623f711fe..3f14e2dda7 100644
> --- a/hw/core/irq.c
> +++ b/hw/core/irq.c
> @@ -26,8 +26,7 @@
>  #include "hw/irq.h"
>  #include "qom/object.h"
>
> -DECLARE_INSTANCE_CHECKER(struct IRQState, IRQ,
> - TYPE_IRQ)
> +OBJECT_DECLARE_SIMPLE_TYPE(IRQState, IRQ)
>
>  struct IRQState {
>  Object parent_obj;
> @@ -68,7 +67,7 @@ qemu_irq *qemu_allocate_irqs(qemu_irq_handler handler, void 
> *opaque, int n)
>
>  qemu_irq qemu_allocate_irq(qemu_irq_handler handler, void *opaque, int n)
>  {
> -struct IRQState *irq;
> +IRQState *irq;
>
>  irq = IRQ(object_new(TYPE_IRQ));
>  irq->handler = handler;
> @@ -94,7 +93,7 @@ void qemu_free_irq(qemu_irq irq)
>
>  static void qemu_notirq(void *opaque, int line, int level)
>  {
> -struct IRQState *irq = opaque;
> +IRQState *irq = opaque;
>
>  irq->handler(irq->opaque, irq->n, !level);
>  }
> @@ -120,7 +119,7 @@ void qemu_irq_intercept_in(qemu_irq *gpio_in, 
> qemu_irq_handler handler, int n)
>  static const TypeInfo irq_type_info = {
> .name = TYPE_IRQ,
> .parent = TYPE_OBJECT,
> -   .instance_size = sizeof(struct IRQState),
> +   .instance_size = sizeof(IRQState),
>  };
>
>  static void irq_register_types(void)
> --
> 2.38.1
>
>

Re: [PATCH 1/4] iothread: Remove unused IOThreadClass / IOTHREAD_CLASS

2023-01-15 Thread Alistair Francis

On Sat, Jan 14, 2023 at 6:04 AM Philippe Mathieu-Daudé
 wrote:
>
> Since commit be8d853766 ("iothread: add I/O thread object") we
> never used IOThreadClass / IOTHREAD_CLASS() / IOTHREAD_GET_CLASS(),
> remove these definitions.
>
> Signed-off-by: Philippe Mathieu-Daudé 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  iothread.c | 4 
>  1 file changed, 4 deletions(-)
>
> diff --git a/iothread.c b/iothread.c
> index 3862a64471..b41c305bd9 100644
> --- a/iothread.c
> +++ b/iothread.c
> @@ -25,10 +25,6 @@
>  #include "qemu/rcu.h"
>  #include "qemu/main-loop.h"
>
> -typedef ObjectClass IOThreadClass;
> -
> -DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD,
> -   TYPE_IOTHREAD)
>
>  #ifdef CONFIG_POSIX
>  /* Benchmark results from 2016 on NVMe SSD drives show max polling times 
> around
> --
> 2.38.1
>
>

[PATCH v2] Fix exec migration on Windows (w32+w64).

2023-01-15 Thread John Berberian, Jr

* Use cmd instead of /bin/sh on Windows.

* Try to auto-detect cmd.exe's path, but default to a hard-coded path.

Note that this will require that gspawn-win[32|64]-helper.exe and
gspawn-win[32|64]-helper-console.exe are included in the Windows binary
distributions (cc: Stefan Weil).

Signed-off-by: "John Berberian, Jr" 
---
Whoops, forgot a header. Here's a revised patch.

 migration/exec.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/migration/exec.c b/migration/exec.c
index 375d2e1b54..38604d73a6 100644
--- a/migration/exec.c
+++ b/migration/exec.c
@@ -23,12 +23,31 @@
 #include "migration.h"
 #include "io/channel-command.h"
 #include "trace.h"
+#include "qemu/cutils.h"
 
+#ifdef WIN32
+const char *exec_get_cmd_path(void);
+const char *exec_get_cmd_path(void)
+{
+g_autofree char *detected_path = g_new(char, MAX_PATH);
+if (GetSystemDirectoryA(detected_path, MAX_PATH) == 0) {
+warn_report("Could not detect cmd.exe path, using default.");
+return "C:\\Windows\\System32\\cmd.exe";
+}
+pstrcat(detected_path, MAX_PATH, "\\cmd.exe");
+return g_steal_pointer(_path);
+}
+#endif
 
 void exec_start_outgoing_migration(MigrationState *s, const char *command, 
Error **errp)
 {
 QIOChannel *ioc;
+
+#ifdef WIN32
+const char *argv[] = { exec_get_cmd_path(), "/c", command, NULL };
+#else
 const char *argv[] = { "/bin/sh", "-c", command, NULL };
+#endif
 
 trace_migration_exec_outgoing(command);
 ioc = QIO_CHANNEL(qio_channel_command_new_spawn(argv,
@@ -55,7 +74,12 @@ static gboolean exec_accept_incoming_migration(QIOChannel 
*ioc,
 void exec_start_incoming_migration(const char *command, Error **errp)
 {
 QIOChannel *ioc;
+
+#ifdef WIN32
+const char *argv[] = { exec_get_cmd_path(), "/c", command, NULL };
+#else
 const char *argv[] = { "/bin/sh", "-c", command, NULL };
+#endif
 
 trace_migration_exec_incoming(command);
 ioc = QIO_CHANNEL(qio_channel_command_new_spawn(argv,
-- 
2.39.0

[PATCH] pci: add enforce_slot_reserved_mask_manual property

2023-01-15 Thread Chuck Zmudzinski

The current reserved slot check in do_pci_register_device(), added with
commit 8b8849844fd6, is done even if the pci device being added is
configured manually for a particular slot. The new property, when set
to false, disables the check when the device is configured to request a
particular slot. This allows an administrator or management tool to
override slot_reserved_mask for a pci device by requesting a particular
slot for the device. The new property is initialized to true which
preserves the existing behavior of slot_reserved_mask by default.

Signed-off-by: Chuck Zmudzinski 
---
 hw/pci/pci.c | 9 -
 include/hw/pci/pci_bus.h | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index c2fb88f9a3..5e15f08036 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -467,6 +467,7 @@ static void pci_root_bus_internal_init(PCIBus *bus, 
DeviceState *parent,
 assert(PCI_FUNC(devfn_min) == 0);
 bus->devfn_min = devfn_min;
 bus->slot_reserved_mask = 0x0;
+bus->enforce_slot_reserved_mask_manual = true;
 bus->address_space_mem = address_space_mem;
 bus->address_space_io = address_space_io;
 bus->flags |= PCI_BUS_IS_ROOT;
@@ -1074,6 +1075,12 @@ static bool pci_bus_devfn_reserved(PCIBus *bus, int 
devfn)
 return bus->slot_reserved_mask & (1UL << PCI_SLOT(devfn));
 }
 
+static bool pci_bus_devfn_reserved_manual(PCIBus *bus, int devfn)
+{
+return bus->enforce_slot_reserved_mask_manual &&
+(bus->slot_reserved_mask & (1UL << PCI_SLOT(devfn)));
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
  const char *name, int devfn,
@@ -1107,7 +1114,7 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev,
"or reserved", name);
 return NULL;
 found: ;
-} else if (pci_bus_devfn_reserved(bus, devfn)) {
+} else if (pci_bus_devfn_reserved_manual(bus, devfn)) {
 error_setg(errp, "PCI: slot %d function %d not available for %s,"
" reserved",
PCI_SLOT(devfn), PCI_FUNC(devfn), name);
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 5653175957..e0f15ee9be 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -37,6 +37,7 @@ struct PCIBus {
 void *iommu_opaque;
 uint8_t devfn_min;
 uint32_t slot_reserved_mask;
+bool enforce_slot_reserved_mask_manual;
 pci_set_irq_fn set_irq;
 pci_map_irq_fn map_irq;
 pci_route_irq_fn route_intx_to_irq;
-- 
2.39.0

Re: [PATCH 0/3] Fix UNMAP notifier for intel-iommu

2023-01-15 Thread Viktor Prutyanov

On Tue, Nov 29, 2022 at 11:10 AM Jason Wang  wrote:
>
> Hi All:
>
> According to ATS, device should work if ATS is disabled. This is not
> correctly implemented in the current intel-iommu since it doesn't
> handle the UNMAP notifier correctly. This breaks the vhost-net +
> vIOMMU without dt.
>
> The root casue is that the when there's a device IOTLB miss (note that
> it's not specific to PCI so it can work without ATS), Qemu doesn't
> build the IOVA tree, so when guest start an IOTLB invalidation, Qemu
> won't trigger the UNMAP notifier.
>
> Fixing by build IOVA tree during IOMMU translsation.
>
> Thanks
>
> Jason Wang (3):
>   intel-iommu: fail MAP notifier without caching mode
>   intel-iommu: fail DEVIOTLB_UNMAP without dt mode
>   intel-iommu: build iova tree during IOMMU translation
>
>  hw/i386/intel_iommu.c | 58 ---
>  1 file changed, 33 insertions(+), 25 deletions(-)
>
> --
> 2.25.1
>

Hi Jason,

I've tried the series with Windows Server 2022 guest with vhost and
intel-iommu (device-iotlb=off) and now networking on this system has
become working.
So, as we discussed, I'm waiting for the series to be accepted in some
form to continue my work about supporting guests who refuse Device-TLB
on systems with device-iotlb=on.

Tested-by: Viktor Prutyanov 

Best regards,
Viktor Prutyanov

[PATCH] Fix exec migration on Windows (w32+w64).

2023-01-15 Thread John Berberian, Jr

* Use cmd instead of /bin/sh on Windows.

* Try to auto-detect cmd.exe's path, but default to a hard-coded path.

Note that this will require that gspawn-win32-helper.exe and
gspawn-win32-helper-console.exe are included in the Windows binary
distributions (cc: Stefan Weil).

Signed-off-by: John Berberian, Jr 
---
 migration/exec.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/migration/exec.c b/migration/exec.c
index 375d2e1b54..f65e7db7df 100644
--- a/migration/exec.c
+++ b/migration/exec.c
@@ -24,11 +24,29 @@
 #include "io/channel-command.h"
 #include "trace.h"
 
+#ifdef WIN32
+const char *exec_get_cmd_path(void);
+const char *exec_get_cmd_path(void)
+{
+g_autofree char *detected_path = g_new(char, MAX_PATH);
+if (GetSystemDirectoryA(detected_path, MAX_PATH) == 0) {
+warn_report("Could not detect cmd.exe path, using default.");
+return "C:\\Windows\\System32\\cmd.exe";
+}
+pstrcat(detected_path, MAX_PATH, "\\cmd.exe");
+return g_steal_pointer(_path);
+}
+#endif
 
 void exec_start_outgoing_migration(MigrationState *s, const char *command, 
Error **errp)
 {
 QIOChannel *ioc;
+
+#ifdef WIN32
+const char *argv[] = { exec_get_cmd_path(), "/c", command, NULL };
+#else
 const char *argv[] = { "/bin/sh", "-c", command, NULL };
+#endif
 
 trace_migration_exec_outgoing(command);
 ioc = QIO_CHANNEL(qio_channel_command_new_spawn(argv,
@@ -55,7 +73,12 @@ static gboolean exec_accept_incoming_migration(QIOChannel 
*ioc,
 void exec_start_incoming_migration(const char *command, Error **errp)
 {
 QIOChannel *ioc;
+
+#ifdef WIN32
+const char *argv[] = { exec_get_cmd_path(), "/c", command, NULL };
+#else
 const char *argv[] = { "/bin/sh", "-c", command, NULL };
+#endif
 
 trace_migration_exec_incoming(command);
 ioc = QIO_CHANNEL(qio_channel_command_new_spawn(argv,
-- 
2.39.0

Re: [PATCH v3 1/3] arm: move KVM breakpoints helpers

2023-01-15 Thread Alex Bennée



francesco.cag...@gmail.com writes:

> From: Francesco Cagnin 
>
> These helpers will be also used for HVF. Aside from reformatting a
> couple of comments for 'checkpatch.pl' and updating meson to compile
> 'hyp_gdbstub.c', this is just code motion.
>
> Signed-off-by: Francesco Cagnin 


Haven't I reviewed this already? Anyway:

Reviewed-by: Alex Bennée 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

[PATCH] linux-user: Improve strace output of getgroups() and setgroups()

2023-01-15 Thread Helge Deller

Make the strace look nicer for those syscalls.

Signed-off-by: Helge Deller 
---
 linux-user/strace.list | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/linux-user/strace.list b/linux-user/strace.list
index 3924046426..7c1124a718 100644
--- a/linux-user/strace.list
+++ b/linux-user/strace.list
@@ -321,10 +321,10 @@
 { TARGET_NR_getgid32, "getgid32" , NULL, NULL, NULL },
 #endif
 #ifdef TARGET_NR_getgroups
-{ TARGET_NR_getgroups, "getgroups" , NULL, NULL, NULL },
+{ TARGET_NR_getgroups, "getgroups" , "%s(%d,%p)", NULL, NULL },
 #endif
 #ifdef TARGET_NR_getgroups32
-{ TARGET_NR_getgroups32, "getgroups32" , NULL, NULL, NULL },
+{ TARGET_NR_getgroups32, "getgroups32" , "%s(%d,%p)", NULL, NULL },
 #endif
 #ifdef TARGET_NR_gethostname
 { TARGET_NR_gethostname, "gethostname" , NULL, NULL, NULL },
@@ -1308,10 +1308,10 @@
 { TARGET_NR_setgid32, "setgid32" , "%s(%u)", NULL, NULL },
 #endif
 #ifdef TARGET_NR_setgroups
-{ TARGET_NR_setgroups, "setgroups" , NULL, NULL, NULL },
+{ TARGET_NR_setgroups, "setgroups" , "%s(%d,%p)", NULL, NULL },
 #endif
 #ifdef TARGET_NR_setgroups32
-{ TARGET_NR_setgroups32, "setgroups32" , NULL, NULL, NULL },
+{ TARGET_NR_setgroups32, "setgroups32" , "%s(%d,%p)", NULL, NULL },
 #endif
 #ifdef TARGET_NR_sethae
 { TARGET_NR_sethae, "sethae" , NULL, NULL, NULL },
--
2.38.1

Re: [PATCH 7/7] hw/mem/cxl_type3: Add CXL RAS Error Injection Support.

2023-01-15 Thread Mike Maslenkin

On Fri, Jan 13, 2023 at 7:43 PM Jonathan Cameron via
 wrote:
>
> CXL uses PCI AER Internal errors to signal to the host that an error has
> occurred. The host can then read more detailed status from the CXL RAS
> capability.
>
> For uncorrectable errors: support multiple injection in one operation
> as this is needed to reliably test multiple header logging support in an
> OS. The equivalent feature doesn't exist for correctable errors, so only
> one error need be injected at a time.
>
> Note:
>  - Header content needs to be manually specified in a fashion that
>matches the specification for what can be in the header for each
>error type.
>
> Injection via QMP:
> { "execute": "qmp_capabilities" }
> ...
> { "execute": "cxl-inject-uncorrectable-errors",
>   "arguments": {
> "path": "/machine/peripheral/cxl-pmem0",
> "errors": [
> {
> "type": "cache-address-parity",
> "header": [ 3, 4]
> },
> {
> "type": "cache-data-parity",
> "header": 
> [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
> },
> {
> "type": "internal",
> "header": [ 1, 2, 4]
> }
> ]
>   }}
> ...
> { "execute": "cxl-inject-correctable-error",
> "arguments": {
> "path": "/machine/peripheral/cxl-pmem0",
> "type": "physical",
> "header": [ 3, 4]
> } }
>
> Signed-off-by: Jonathan Cameron 
> ---
>  hw/cxl/cxl-component-utils.c   |   4 +-
>  hw/mem/cxl_type3.c | 290 +
>  hw/mem/cxl_type3_stubs.c   |  10 ++
>  hw/mem/meson.build |   2 +
>  include/hw/cxl/cxl_component.h |  26 +++
>  include/hw/cxl/cxl_device.h|  11 ++
>  qapi/cxl.json  | 113 +
>  qapi/meson.build   |   1 +
>  qapi/qapi-schema.json  |   1 +
>  9 files changed, 457 insertions(+), 1 deletion(-)
>
> diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> index 3edd303a33..02fb6c17b9 100644
> --- a/hw/cxl/cxl-component-utils.c
> +++ b/hw/cxl/cxl-component-utils.c
> @@ -142,16 +142,18 @@ static void ras_init_common(uint32_t *reg_state, 
> uint32_t *write_msk)
>   * be handled as RO.
>   */
>  reg_state[R_CXL_RAS_UNC_ERR_STATUS] = 0;
> +write_msk[R_CXL_RAS_UNC_ERR_STATUS] = 0x1cfff;
>  /* Bits 12-13 and 17-31 reserved in CXL 2.0 */
>  reg_state[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
>  write_msk[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
>  reg_state[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
>  write_msk[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
>  reg_state[R_CXL_RAS_COR_ERR_STATUS] = 0;
> +write_msk[R_CXL_RAS_COR_ERR_STATUS] = 0x7f;
>  reg_state[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
>  write_msk[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
>  /* CXL switches and devices must set */
> -reg_state[R_CXL_RAS_ERR_CAP_CTRL] = 0x00;
> +reg_state[R_CXL_RAS_ERR_CAP_CTRL] = 0x200;
>  }
>
>  static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 6cdd988d1d..ae8fd09e87 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1,6 +1,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/units.h"
>  #include "qemu/error-report.h"
> +#include "qapi/qapi-commands-cxl.h"
>  #include "hw/mem/memory-device.h"
>  #include "hw/mem/pc-dimm.h"
>  #include "hw/pci/pci.h"
> @@ -323,6 +324,66 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int 
> which)
>  ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
>  }
>
> +static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
> +{
> +switch (qmp_err) {
> +case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY:
> +return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY;
> +case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY:
> +return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY;
> +case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY:
> +return CXL_RAS_UNC_ERR_CACHE_BE_PARITY;
> +case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC:
> +return CXL_RAS_UNC_ERR_CACHE_DATA_ECC;
> +case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY:
> +return CXL_RAS_UNC_ERR_MEM_DATA_PARITY;
> +case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY:
> +return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY;
> +case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY:
> +return CXL_RAS_UNC_ERR_MEM_BE_PARITY;
> +case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC:
> +return CXL_RAS_UNC_ERR_MEM_DATA_ECC;
> +case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD:
> +return CXL_RAS_UNC_ERR_REINIT_THRESHOLD;
> +case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING:
> +return CXL_RAS_UNC_ERR_RSVD_ENCODING;
> +case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED:
> +return CXL_RAS_UNC_ERR_POISON_RECEIVED;
> +case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW:
> +return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW;
> +case

[qemu-web PATCH] Add download procedure in Windows with msys2

2023-01-15 Thread nathbappai

From: Biswapriyo Nath 

Signed-off-by: Biswapriyo Nath 
---
 _download/windows.md | 32 
 1 file changed, 32 insertions(+)

diff --git a/_download/windows.md b/_download/windows.md
index 5ccf8b4..43c9ac7 100644
--- a/_download/windows.md
+++ b/_download/windows.md
@@ -1,3 +1,35 @@
 Stefan Weil provides binaries and installers for
 both [32-bit](https://qemu.weilnetz.de/w32/) and
 [64-bit](https://qemu.weilnetz.de/w64/) Windows.
+
+**MSYS2:**
+
+QEMU can be installed using [MSYS2](https://www.msys2.org/) also. MSYS2 uses
+[pacman](https://wiki.archlinux.org/title/Pacman) to manage packages. First,
+follow the [MSYS2](https://www.msys2.org/) installation procedure. Then update
+the packages with `pacman -Syu` command. Now choose the proper command for your
+system as following:
+
+* For 32 bit Windows 7 or above (in MINGW32):
+
+```
+pacman -S mingw-w64-i686-qemu
+```
+
+* For 64 bit Windows 7 or above (in MINGW64):
+
+```
+pacman -S mingw-w64-x86_64-qemu
+```
+
+* For 64 bit Windows 8.1 or above (in UCRT64):
+
+```
+pacman -S mingw-w64-ucrt-x86_64-qemu
+```
+
+Some QEMU related tools can be found in separate packages. Please see the
+MSYS2's [mingw-w64-qemu](https://packages.msys2.org/base/mingw-w64-qemu) page
+for more information. Any QEMU package related issues can be found in
+[MINGW-packages](https://github.com/msys2/MINGW-packages/issues?q=is%3Aissue+is%3Aopen+qemu)
+repository.
-- 
2.39.0

Re: [PATCH v6 09/13] vfio/migration: Implement VFIO migration protocol v2

2023-01-15 Thread Avihai Horon




On 13/01/2023 18:13, Cédric Le Goater wrote:

External email: Use caution opening links or attachments


On 1/12/23 09:50, Avihai Horon wrote:

Implement the basic mandatory part of VFIO migration protocol v2.
This includes all functionality that is necessary to support
VFIO_MIGRATION_STOP_COPY part of the v2 protocol.

The two protocols, v1 and v2, will co-exist and in the following patches
v1 protocol code will be removed.

There are several main differences between v1 and v2 protocols:
- VFIO device state is now represented as a finite state machine instead
   of a bitmap.

- Migration interface with kernel is now done using VFIO_DEVICE_FEATURE
   ioctl and normal read() and write() instead of the migration region.

- Pre-copy is made optional in v2 protocol. Support for pre-copy will be
   added later on.

Detailed information about VFIO migration protocol v2 and its difference
compared to v1 protocol can be found here [1].

[1]
https://lore.kernel.org/all/20220224142024.147653-10-yish...@nvidia.com/

Signed-off-by: Avihai Horon 



LGTM,

Reviewed-by: Cédric Le Goater 

Still a small issue below,


I have sent v7 with a fix for this.

Thanks for reviewing!


---
  include/hw/vfio/vfio-common.h |   5 +
  hw/vfio/common.c  |  19 +-
  hw/vfio/migration.c   | 455 +++---
  hw/vfio/trace-events  |   7 +
  4 files changed, 447 insertions(+), 39 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h 
b/include/hw/vfio/vfio-common.h

index bbaf72ba00..2ec3346fea 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -66,6 +66,11 @@ typedef struct VFIOMigration {
  int vm_running;
  Notifier migration_state;
  uint64_t pending_bytes;
+    enum vfio_device_mig_state device_state;
+    int data_fd;
+    void *data_buffer;
+    size_t data_buffer_size;
+    bool v2;
  } VFIOMigration;

  typedef struct VFIOAddressSpace {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 550b2d7ded..dcaa77d2a8 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -355,10 +355,18 @@ static bool 
vfio_devices_all_dirty_tracking(VFIOContainer *container)

  return false;
  }

-    if ((vbasedev->pre_copy_dirty_page_tracking == 
ON_OFF_AUTO_OFF) &&

+    if (!migration->v2 &&
+    (vbasedev->pre_copy_dirty_page_tracking == 
ON_OFF_AUTO_OFF) &&
  (migration->device_state_v1 & 
VFIO_DEVICE_STATE_V1_RUNNING)) {

  return false;
  }
+
+    if (migration->v2 &&
+    (vbasedev->pre_copy_dirty_page_tracking == 
ON_OFF_AUTO_OFF) &&
+    (migration->device_state == 
VFIO_DEVICE_STATE_RUNNING ||
+ migration->device_state == 
VFIO_DEVICE_STATE_RUNNING_P2P)) {

+    return false;
+    }
  }
  }
  return true;
@@ -385,7 +393,14 @@ static bool 
vfio_devices_all_running_and_mig_active(VFIOContainer *container)

  return false;
  }

-    if (migration->device_state_v1 & 
VFIO_DEVICE_STATE_V1_RUNNING) {

+    if (!migration->v2 &&
+    migration->device_state_v1 & 
VFIO_DEVICE_STATE_V1_RUNNING) {

+    continue;
+    }
+
+    if (migration->v2 &&
+    (migration->device_state == 
VFIO_DEVICE_STATE_RUNNING ||
+ migration->device_state == 
VFIO_DEVICE_STATE_RUNNING_P2P)) {

  continue;
  } else {
  return false;
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 9df859f4d3..08f53189fa 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -10,6 +10,7 @@
  #include "qemu/osdep.h"
  #include "qemu/main-loop.h"
  #include "qemu/cutils.h"
+#include "qemu/units.h"
  #include 
  #include 

@@ -44,8 +45,103 @@
  #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xef13ULL)
  #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xef14ULL)

+/*
+ * This is an arbitrary size based on migration of mlx5 devices, 
where typically
+ * total device migration size is on the order of 100s of MB. 
Testing with
+ * larger values, e.g. 128MB and 1GB, did not show a performance 
improvement.

+ */
+#define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB)
+
  static int64_t bytes_transferred;

+static const char *mig_state_to_str(enum vfio_device_mig_state state)
+{
+    switch (state) {
+    case VFIO_DEVICE_STATE_ERROR:
+    return "ERROR";
+    case VFIO_DEVICE_STATE_STOP:
+    return "STOP";
+    case VFIO_DEVICE_STATE_RUNNING:
+    return "RUNNING";
+    case VFIO_DEVICE_STATE_STOP_COPY:
+    return "STOP_COPY";
+    case VFIO_DEVICE_STATE_RESUMING:
+    return "RESUMING";
+    case VFIO_DEVICE_STATE_RUNNING_P2P:
+    return "RUNNING_P2P";
+    default:
+    return "UNKNOWN STATE";
+    }
+}
+
+static int vfio_migration_set_state(VFIODevice *vbasedev,
+    enum

[PATCH v7 09/13] vfio/migration: Implement VFIO migration protocol v2

2023-01-15 Thread Avihai Horon

Implement the basic mandatory part of VFIO migration protocol v2.
This includes all functionality that is necessary to support
VFIO_MIGRATION_STOP_COPY part of the v2 protocol.

The two protocols, v1 and v2, will co-exist and in the following patches
v1 protocol code will be removed.

There are several main differences between v1 and v2 protocols:
- VFIO device state is now represented as a finite state machine instead
  of a bitmap.

- Migration interface with kernel is now done using VFIO_DEVICE_FEATURE
  ioctl and normal read() and write() instead of the migration region.

- Pre-copy is made optional in v2 protocol. Support for pre-copy will be
  added later on.

Detailed information about VFIO migration protocol v2 and its difference
compared to v1 protocol can be found here [1].

[1]
https://lore.kernel.org/all/20220224142024.147653-10-yish...@nvidia.com/

Signed-off-by: Avihai Horon 
Reviewed-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-common.h |   5 +
 hw/vfio/common.c  |  19 +-
 hw/vfio/migration.c   | 455 +++---
 hw/vfio/trace-events  |   7 +
 4 files changed, 447 insertions(+), 39 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index bbaf72ba00..2ec3346fea 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -66,6 +66,11 @@ typedef struct VFIOMigration {
 int vm_running;
 Notifier migration_state;
 uint64_t pending_bytes;
+enum vfio_device_mig_state device_state;
+int data_fd;
+void *data_buffer;
+size_t data_buffer_size;
+bool v2;
 } VFIOMigration;
 
 typedef struct VFIOAddressSpace {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 550b2d7ded..dcaa77d2a8 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -355,10 +355,18 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer 
*container)
 return false;
 }
 
-if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
+if (!migration->v2 &&
+(vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
 (migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING)) {
 return false;
 }
+
+if (migration->v2 &&
+(vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
+(migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
+ migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P)) {
+return false;
+}
 }
 }
 return true;
@@ -385,7 +393,14 @@ static bool 
vfio_devices_all_running_and_mig_active(VFIOContainer *container)
 return false;
 }
 
-if (migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING) {
+if (!migration->v2 &&
+migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING) {
+continue;
+}
+
+if (migration->v2 &&
+(migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
+ migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P)) {
 continue;
 } else {
 return false;
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 9df859f4d3..f19ada0f4f 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -10,6 +10,7 @@
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
 #include "qemu/cutils.h"
+#include "qemu/units.h"
 #include 
 #include 
 
@@ -44,8 +45,103 @@
 #define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xef13ULL)
 #define VFIO_MIG_FLAG_DEV_DATA_STATE(0xef14ULL)
 
+/*
+ * This is an arbitrary size based on migration of mlx5 devices, where 
typically
+ * total device migration size is on the order of 100s of MB. Testing with
+ * larger values, e.g. 128MB and 1GB, did not show a performance improvement.
+ */
+#define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB)
+
 static int64_t bytes_transferred;
 
+static const char *mig_state_to_str(enum vfio_device_mig_state state)
+{
+switch (state) {
+case VFIO_DEVICE_STATE_ERROR:
+return "ERROR";
+case VFIO_DEVICE_STATE_STOP:
+return "STOP";
+case VFIO_DEVICE_STATE_RUNNING:
+return "RUNNING";
+case VFIO_DEVICE_STATE_STOP_COPY:
+return "STOP_COPY";
+case VFIO_DEVICE_STATE_RESUMING:
+return "RESUMING";
+case VFIO_DEVICE_STATE_RUNNING_P2P:
+return "RUNNING_P2P";
+default:
+return "UNKNOWN STATE";
+}
+}
+
+static int vfio_migration_set_state(VFIODevice *vbasedev,
+enum vfio_device_mig_state new_state,
+enum vfio_device_mig_state recover_state)
+{
+VFIOMigration *migration = vbasedev->migration;
+uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+  sizeof(struct

[PATCH v7 13/13] docs/devel: Align VFIO migration docs to v2 protocol

2023-01-15 Thread Avihai Horon

Now that VFIO migration protocol v2 has been implemented and v1 protocol
has been removed, update the documentation according to v2 protocol.

Signed-off-by: Avihai Horon 
Reviewed-by: Cédric Le Goater 
---
 docs/devel/vfio-migration.rst | 68 ---
 1 file changed, 30 insertions(+), 38 deletions(-)

diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst
index 9ff6163c88..1d50c2fe5f 100644
--- a/docs/devel/vfio-migration.rst
+++ b/docs/devel/vfio-migration.rst
@@ -7,46 +7,39 @@ the guest is running on source host and restoring this saved 
state on the
 destination host. This document details how saving and restoring of VFIO
 devices is done in QEMU.
 
-Migration of VFIO devices consists of two phases: the optional pre-copy phase,
-and the stop-and-copy phase. The pre-copy phase is iterative and allows to
-accommodate VFIO devices that have a large amount of data that needs to be
-transferred. The iterative pre-copy phase of migration allows for the guest to
-continue whilst the VFIO device state is transferred to the destination, this
-helps to reduce the total downtime of the VM. VFIO devices can choose to skip
-the pre-copy phase of migration by returning pending_bytes as zero during the
-pre-copy phase.
+Migration of VFIO devices currently consists of a single stop-and-copy phase.
+During the stop-and-copy phase the guest is stopped and the entire VFIO device
+data is transferred to the destination.
+
+The pre-copy phase of migration is currently not supported for VFIO devices.
+Support for VFIO pre-copy will be added later on.
 
 A detailed description of the UAPI for VFIO device migration can be found in
-the comment for the ``vfio_device_migration_info`` structure in the header
-file linux-headers/linux/vfio.h.
+the comment for the ``vfio_device_mig_state`` structure in the header file
+linux-headers/linux/vfio.h.
 
 VFIO implements the device hooks for the iterative approach as follows:
 
-* A ``save_setup`` function that sets up the migration region and sets _SAVING
-  flag in the VFIO device state.
+* A ``save_setup`` function that sets up migration on the source.
 
-* A ``load_setup`` function that sets up the migration region on the
-  destination and sets _RESUMING flag in the VFIO device state.
+* A ``load_setup`` function that sets the VFIO device on the destination in
+  _RESUMING state.
 
 * A ``save_live_pending`` function that reads pending_bytes from the vendor
   driver, which indicates the amount of data that the vendor driver has yet to
   save for the VFIO device.
 
-* A ``save_live_iterate`` function that reads the VFIO device's data from the
-  vendor driver through the migration region during iterative phase.
-
 * A ``save_state`` function to save the device config space if it is present.
 
-* A ``save_live_complete_precopy`` function that resets _RUNNING flag from the
-  VFIO device state and iteratively copies the remaining data for the VFIO
-  device until the vendor driver indicates that no data remains (pending bytes
-  is zero).
+* A ``save_live_complete_precopy`` function that sets the VFIO device in
+  _STOP_COPY state and iteratively copies the data for the VFIO device until
+  the vendor driver indicates that no data remains.
 
 * A ``load_state`` function that loads the config section and the data
-  sections that are generated by the save functions above
+  sections that are generated by the save functions above.
 
 * ``cleanup`` functions for both save and load that perform any migration
-  related cleanup, including unmapping the migration region
+  related cleanup.
 
 
 The VFIO migration code uses a VM state change handler to change the VFIO
@@ -71,13 +64,13 @@ tracking can identify dirtied pages, but any page pinned by 
the vendor driver
 can also be written by the device. There is currently no device or IOMMU
 support for dirty page tracking in hardware.
 
-By default, dirty pages are tracked when the device is in pre-copy as well as
-stop-and-copy phase. So, a page pinned by the vendor driver will be copied to
-the destination in both phases. Copying dirty pages in pre-copy phase helps
-QEMU to predict if it can achieve its downtime tolerances. If QEMU during
-pre-copy phase keeps finding dirty pages continuously, then it understands
-that even in stop-and-copy phase, it is likely to find dirty pages and can
-predict the downtime accordingly.
+By default, dirty pages are tracked during pre-copy as well as stop-and-copy
+phase. So, a page pinned by the vendor driver will be copied to the destination
+in both phases. Copying dirty pages in pre-copy phase helps QEMU to predict if
+it can achieve its downtime tolerances. If QEMU during pre-copy phase keeps
+finding dirty pages continuously, then it understands that even in 
stop-and-copy
+phase, it is likely to find dirty pages and can predict the downtime
+accordingly.
 
 QEMU also provides a per device opt-out option ``pre-copy-dirty-page-tracking``
 which disables

[PATCH v7 10/13] vfio/migration: Optimize vfio_save_pending()

2023-01-15 Thread Avihai Horon

During pre-copy phase of migration vfio_save_pending() is called
repeatedly and queries the VFIO device for its pending data size.

As long as pending RAM size is over the threshold, migration can't
converge and be completed. Therefore, during this time there is no
point in querying the VFIO device pending data size.

Avoid these unnecessary queries by issuing them in a RAM pre-copy
notifier instead of vfio_save_pending().

This way the VFIO device is queried only when RAM pending data is
below the threshold, when there is an actual chance for migration to
converge.

Signed-off-by: Avihai Horon 
Reviewed-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-common.h |  2 ++
 hw/vfio/migration.c   | 56 +++
 hw/vfio/trace-events  |  1 +
 3 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 2ec3346fea..113f8d9208 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -65,11 +65,13 @@ typedef struct VFIOMigration {
 uint32_t device_state_v1;
 int vm_running;
 Notifier migration_state;
+NotifierWithReturn migration_data;
 uint64_t pending_bytes;
 enum vfio_device_mig_state device_state;
 int data_fd;
 void *data_buffer;
 size_t data_buffer_size;
+uint64_t stop_copy_size;
 bool v2;
 } VFIOMigration;
 
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index f19ada0f4f..87ef2b44ef 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -655,29 +655,19 @@ static void vfio_v1_save_cleanup(void *opaque)
 trace_vfio_save_cleanup(vbasedev->name);
 }
 
-/*
- * Migration size of VFIO devices can be as little as a few KBs or as big as
- * many GBs. This value should be big enough to cover the worst case.
- */
-#define VFIO_MIG_STOP_COPY_SIZE (100 * GiB)
 static void vfio_save_pending(void *opaque, uint64_t threshold_size,
   uint64_t *res_precopy_only,
   uint64_t *res_compatible,
   uint64_t *res_postcopy_only)
 {
 VFIODevice *vbasedev = opaque;
-uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE;
+VFIOMigration *migration = vbasedev->migration;
 
-/*
- * If getting pending migration size fails, VFIO_MIG_STOP_COPY_SIZE is
- * reported so downtime limit won't be violated.
- */
-vfio_query_stop_copy_size(vbasedev, _copy_size);
-*res_precopy_only += stop_copy_size;
+*res_precopy_only += migration->stop_copy_size;
 
 trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
 *res_postcopy_only, *res_compatible,
-stop_copy_size);
+migration->stop_copy_size);
 }
 
 static void vfio_v1_save_pending(void *opaque, uint64_t threshold_size,
@@ -1104,6 +1094,40 @@ static void vfio_migration_state_notifier(Notifier 
*notifier, void *data)
 }
 }
 
+/*
+ * Migration size of VFIO devices can be as little as a few KBs or as big as
+ * many GBs. This value should be big enough to cover the worst case.
+ */
+#define VFIO_MIG_STOP_COPY_SIZE (100 * GiB)
+static int vfio_migration_data_notifier(NotifierWithReturn *n, void *data)
+{
+VFIOMigration *migration = container_of(n, VFIOMigration, migration_data);
+VFIODevice *vbasedev = migration->vbasedev;
+PrecopyNotifyData *pnd = data;
+
+if (pnd->reason != PRECOPY_NOTIFY_AFTER_BITMAP_SYNC) {
+return 0;
+}
+
+/* No need to get pending size when finishing migration */
+if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
+return 0;
+}
+
+if (vfio_query_stop_copy_size(vbasedev, >stop_copy_size)) {
+/*
+ * Failed to get pending migration size. Report big pending size so
+ * downtime limit won't be violated.
+ */
+migration->stop_copy_size = VFIO_MIG_STOP_COPY_SIZE;
+}
+
+trace_vfio_migration_data_notifier(vbasedev->name,
+   migration->stop_copy_size);
+
+return 0;
+}
+
 static void vfio_migration_exit(VFIODevice *vbasedev)
 {
 VFIOMigration *migration = vbasedev->migration;
@@ -1225,6 +1249,9 @@ static int vfio_migration_init(VFIODevice *vbasedev)
 
 migration->vm_state = qdev_add_vm_change_state_handler(
 vbasedev->dev, vfio_vmstate_change, vbasedev);
+
+migration->migration_data.notify = vfio_migration_data_notifier;
+precopy_add_notifier(>migration_data);
 } else {
 register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1,
  _vfio_v1_handlers, vbasedev);
@@ -1283,6 +1310,9 @@ void vfio_migration_finalize(VFIODevice *vbasedev)
 if (vbasedev->migration) {
 VFIOMigration *migration = vbasedev->migration;
 
+if (migration->v2) {
+precopy_remove_notifier(>migration_data);
+}

[PATCH v7 08/13] vfio/migration: Rename functions/structs related to v1 protocol

2023-01-15 Thread Avihai Horon

To avoid name collisions, rename functions and structs related to VFIO
migration protocol v1. This will allow the two protocols to co-exist
when v2 protocol is added, until v1 is removed. No functional changes
intended.

Signed-off-by: Avihai Horon 
Reviewed-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-common.h |   2 +-
 hw/vfio/common.c  |   6 +-
 hw/vfio/migration.c   | 106 +-
 hw/vfio/trace-events  |  12 ++--
 4 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index e573f5a9f1..bbaf72ba00 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -62,7 +62,7 @@ typedef struct VFIOMigration {
 struct VFIODevice *vbasedev;
 VMChangeStateEntry *vm_state;
 VFIORegion region;
-uint32_t device_state;
+uint32_t device_state_v1;
 int vm_running;
 Notifier migration_state;
 uint64_t pending_bytes;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 3a35f4afad..550b2d7ded 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -355,8 +355,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer 
*container)
 return false;
 }
 
-if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF)
-&& (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) {
+if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
+(migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING)) {
 return false;
 }
 }
@@ -385,7 +385,7 @@ static bool 
vfio_devices_all_running_and_mig_active(VFIOContainer *container)
 return false;
 }
 
-if (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING) {
+if (migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING) {
 continue;
 } else {
 return false;
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 977da64411..9df859f4d3 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -107,8 +107,8 @@ static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, 
size_t count,
  * an error is returned.
  */
 
-static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
-uint32_t value)
+static int vfio_migration_v1_set_state(VFIODevice *vbasedev, uint32_t mask,
+   uint32_t value)
 {
 VFIOMigration *migration = vbasedev->migration;
 VFIORegion *region = >region;
@@ -145,8 +145,8 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, 
uint32_t mask,
 return ret;
 }
 
-migration->device_state = device_state;
-trace_vfio_migration_set_state(vbasedev->name, device_state);
+migration->device_state_v1 = device_state;
+trace_vfio_migration_v1_set_state(vbasedev->name, device_state);
 return 0;
 }
 
@@ -260,8 +260,8 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice 
*vbasedev, uint64_t *size)
 return ret;
 }
 
-static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
-uint64_t data_size)
+static int vfio_v1_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
+   uint64_t data_size)
 {
 VFIORegion *region = >migration->region;
 uint64_t data_offset = 0, size, report_size;
@@ -288,7 +288,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice 
*vbasedev,
 data_size = 0;
 }
 
-trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
+trace_vfio_v1_load_state_device_data(vbasedev->name, data_offset, 
size);
 
 while (size) {
 void *buf;
@@ -394,7 +394,7 @@ static int vfio_load_device_config_state(QEMUFile *f, void 
*opaque)
 return qemu_file_get_error(f);
 }
 
-static void vfio_migration_cleanup(VFIODevice *vbasedev)
+static void vfio_migration_v1_cleanup(VFIODevice *vbasedev)
 {
 VFIOMigration *migration = vbasedev->migration;
 
@@ -405,13 +405,13 @@ static void vfio_migration_cleanup(VFIODevice *vbasedev)
 
 /* -- */
 
-static int vfio_save_setup(QEMUFile *f, void *opaque)
+static int vfio_v1_save_setup(QEMUFile *f, void *opaque)
 {
 VFIODevice *vbasedev = opaque;
 VFIOMigration *migration = vbasedev->migration;
 int ret;
 
-trace_vfio_save_setup(vbasedev->name);
+trace_vfio_v1_save_setup(vbasedev->name);
 
 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
 
@@ -431,8 +431,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque)
 }
 }
 
-ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
-   VFIO_DEVICE_STATE_V1_SAVING);
+ret = vfio_migration_v1_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
+

[PATCH v7 12/13] vfio: Alphabetize migration section of VFIO trace-events file

2023-01-15 Thread Avihai Horon

Sort the migration section of VFIO trace events file alphabetically
and move two misplaced traces to common.c section.

Signed-off-by: Avihai Horon 
Reviewed-by: Cédric Le Goater 
---
 hw/vfio/trace-events | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 60c49b2ecf..db9cb94952 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -119,6 +119,8 @@ vfio_region_sparse_mmap_header(const char *name, int index, 
int nr_areas) "Devic
 vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) 
"sparse entry %d [0x%lx - 0x%lx]"
 vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t 
subtype) "%s index %d, %08x/%0x8"
 vfio_dma_unmap_overflow_workaround(void) ""
+vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t 
bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 
0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64
+vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu 
dirty @ 0x%"PRIx64" - 0x%"PRIx64
 
 # platform.c
 vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group 
#%d"
@@ -148,20 +150,18 @@ vfio_display_edid_update(uint32_t prefx, uint32_t prefy) 
"%ux%u"
 vfio_display_edid_write_error(void) ""
 
 # migration.c
+vfio_load_cleanup(const char *name) " (%s)"
+vfio_load_device_config_state(const char *name) " (%s)"
+vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
+vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " 
(%s) size 0x%"PRIx64" ret %d"
+vfio_migration_data_notifier(const char *name, uint64_t stopcopy_size) " (%s) 
stopcopy size 0x%"PRIx64
 vfio_migration_probe(const char *name) " (%s)"
 vfio_migration_set_state(const char *name, const char *state) " (%s) state %s"
-vfio_vmstate_change(const char *name, int running, const char *reason, const 
char *dev_state) " (%s) running %d reason %s device state %s"
 vfio_migration_state_notifier(const char *name, const char *state) " (%s) 
state %s"
-vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data 
buffer size 0x%"PRIx64
+vfio_save_block(const char *name, int data_size) " (%s) data_size %d"
 vfio_save_cleanup(const char *name) " (%s)"
+vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
 vfio_save_device_config_state(const char *name) " (%s)"
 vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, 
uint64_t compatible, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" 
postcopy 0x%"PRIx64" compatible 0x%"PRIx64" stopcopy size 0x%"PRIx64
-vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
-vfio_load_device_config_state(const char *name) " (%s)"
-vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
-vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " 
(%s) size 0x%"PRIx64" ret %d"
-vfio_load_cleanup(const char *name) " (%s)"
-vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t 
bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 
0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64
-vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu 
dirty @ 0x%"PRIx64" - 0x%"PRIx64
-vfio_save_block(const char *name, int data_size) " (%s) data_size %d"
-vfio_migration_data_notifier(const char *name, uint64_t stopcopy_size) " (%s) 
stopcopy size 0x%"PRIx64
+vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data 
buffer size 0x%"PRIx64
+vfio_vmstate_change(const char *name, int running, const char *reason, const 
char *dev_state) " (%s) running %d reason %s device state %s"
-- 
2.26.3

[PATCH v7 11/13] vfio/migration: Remove VFIO migration protocol v1

2023-01-15 Thread Avihai Horon

Now that v2 protocol implementation has been added, remove the
deprecated v1 implementation.

Signed-off-by: Avihai Horon 
Reviewed-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-common.h |   5 -
 hw/vfio/common.c  |  19 +-
 hw/vfio/migration.c   | 703 +-
 hw/vfio/trace-events  |   9 -
 4 files changed, 24 insertions(+), 712 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 113f8d9208..2aba45887c 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -61,18 +61,13 @@ typedef struct VFIORegion {
 typedef struct VFIOMigration {
 struct VFIODevice *vbasedev;
 VMChangeStateEntry *vm_state;
-VFIORegion region;
-uint32_t device_state_v1;
-int vm_running;
 Notifier migration_state;
 NotifierWithReturn migration_data;
-uint64_t pending_bytes;
 enum vfio_device_mig_state device_state;
 int data_fd;
 void *data_buffer;
 size_t data_buffer_size;
 uint64_t stop_copy_size;
-bool v2;
 } VFIOMigration;
 
 typedef struct VFIOAddressSpace {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index dcaa77d2a8..9a0dbee6b4 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -355,14 +355,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer 
*container)
 return false;
 }
 
-if (!migration->v2 &&
-(vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
-(migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING)) {
-return false;
-}
-
-if (migration->v2 &&
-(vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
+if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
 (migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
  migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P)) {
 return false;
@@ -393,14 +386,8 @@ static bool 
vfio_devices_all_running_and_mig_active(VFIOContainer *container)
 return false;
 }
 
-if (!migration->v2 &&
-migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING) {
-continue;
-}
-
-if (migration->v2 &&
-(migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
- migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P)) {
+if (migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
+migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P) {
 continue;
 } else {
 return false;
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 87ef2b44ef..7489fcb03a 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -142,220 +142,6 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
 return 0;
 }
 
-static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
-  off_t off, bool iswrite)
-{
-int ret;
-
-ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
-pread(vbasedev->fd, val, count, off);
-if (ret < count) {
-error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
- HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
- vbasedev->name, off, strerror(errno));
-return (ret < 0) ? ret : -EINVAL;
-}
-return 0;
-}
-
-static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
-   off_t off, bool iswrite)
-{
-int ret, done = 0;
-__u8 *tbuf = buf;
-
-while (count) {
-int bytes = 0;
-
-if (count >= 8 && !(off % 8)) {
-bytes = 8;
-} else if (count >= 4 && !(off % 4)) {
-bytes = 4;
-} else if (count >= 2 && !(off % 2)) {
-bytes = 2;
-} else {
-bytes = 1;
-}
-
-ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
-if (ret) {
-return ret;
-}
-
-count -= bytes;
-done += bytes;
-off += bytes;
-tbuf += bytes;
-}
-return done;
-}
-
-#define vfio_mig_read(f, v, c, o)   vfio_mig_rw(f, (__u8 *)v, c, o, false)
-#define vfio_mig_write(f, v, c, o)  vfio_mig_rw(f, (__u8 *)v, c, o, true)
-
-#define VFIO_MIG_STRUCT_OFFSET(f)   \
- offsetof(struct vfio_device_migration_info, f)
-/*
- * Change the device_state register for device @vbasedev. Bits set in @mask
- * are preserved, bits set in @value are set, and bits not set in either @mask
- * or @value are cleared in device_state. If the register cannot be accessed,
- * the resulting state would be invalid, or the device enters an error state,
- * an error is returned.
- */
-
-static int vfio_migration_v1_set_state(VFIODevice

[PATCH v7 04/13] vfio/migration: Allow migration without VFIO IOMMU dirty tracking support

2023-01-15 Thread Avihai Horon

Currently, if IOMMU of a VFIO container doesn't support dirty page
tracking, migration is blocked. This is because a DMA-able VFIO device
can dirty RAM pages without updating QEMU about it, thus breaking the
migration.

However, this doesn't mean that migration can't be done at all.
In such case, allow migration and let QEMU VFIO code mark all pages
dirty.

This guarantees that all pages that might have gotten dirty are reported
back, and thus guarantees a valid migration even without VFIO IOMMU
dirty tracking support.

The motivation for this patch is the introduction of iommufd [1].
iommufd can directly implement the /dev/vfio/vfio container IOCTLs by
mapping them into its internal ops, allowing the usage of these IOCTLs
over iommufd. However, VFIO IOMMU dirty tracking is not supported by
this VFIO compatibility API.

This patch will allow migration by hosts that use the VFIO compatibility
API and prevent migration regressions caused by the lack of VFIO IOMMU
dirty tracking support.

[1]
https://lore.kernel.org/kvm/0-v6-a196d26f289e+11787-iommufd_...@nvidia.com/

Signed-off-by: Avihai Horon 
---
 hw/vfio/common.c| 20 ++--
 hw/vfio/migration.c |  3 +--
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 130e5d1dc7..f6dd571549 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -488,6 +488,12 @@ static int vfio_dma_unmap(VFIOContainer *container,
 return -errno;
 }
 
+if (iotlb && vfio_devices_all_running_and_saving(container)) {
+cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
+tcg_enabled() ? DIRTY_CLIENTS_ALL :
+DIRTY_CLIENTS_NOCODE);
+}
+
 return 0;
 }
 
@@ -1201,6 +1207,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer 
*container, bool start)
 .argsz = sizeof(dirty),
 };
 
+if (!container->dirty_pages_supported) {
+return;
+}
+
 if (start) {
 dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
 } else {
@@ -1236,6 +1246,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer 
*container, uint64_t iova,
 uint64_t pages;
 int ret;
 
+if (!container->dirty_pages_supported) {
+cpu_physical_memory_set_dirty_range(ram_addr, size,
+tcg_enabled() ? DIRTY_CLIENTS_ALL :
+DIRTY_CLIENTS_NOCODE);
+return 0;
+}
+
 dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
 
 dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
@@ -1409,8 +1426,7 @@ static void vfio_listener_log_sync(MemoryListener 
*listener,
 {
 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
 
-if (vfio_listener_skipped_section(section) ||
-!container->dirty_pages_supported) {
+if (vfio_listener_skipped_section(section)) {
 return;
 }
 
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 09fe7c1de2..552c2313b2 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -860,11 +860,10 @@ int64_t vfio_mig_bytes_transferred(void)
 
 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 {
-VFIOContainer *container = vbasedev->group->container;
 struct vfio_region_info *info = NULL;
 int ret = -ENOTSUP;
 
-if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
+if (!vbasedev->enable_migration) {
 goto add_blocker;
 }
 
-- 
2.26.3

[PATCH v7 05/13] migration/qemu-file: Add qemu_file_get_to_fd()

2023-01-15 Thread Avihai Horon

Add new function qemu_file_get_to_fd() that allows reading data from
QEMUFile and writing it straight into a given fd.

This will be used later in VFIO migration code.

Signed-off-by: Avihai Horon 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Cédric Le Goater 
---
 migration/qemu-file.h |  1 +
 migration/qemu-file.c | 34 ++
 2 files changed, 35 insertions(+)

diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index fa13d04d78..9d0155a2a1 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -148,6 +148,7 @@ int qemu_file_shutdown(QEMUFile *f);
 QEMUFile *qemu_file_get_return_path(QEMUFile *f);
 void qemu_fflush(QEMUFile *f);
 void qemu_file_set_blocking(QEMUFile *f, bool block);
+int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size);
 
 void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
 void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 2d5f74ffc2..102ab3b439 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -940,3 +940,37 @@ QIOChannel *qemu_file_get_ioc(QEMUFile *file)
 {
 return file->ioc;
 }
+
+/*
+ * Read size bytes from QEMUFile f and write them to fd.
+ */
+int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size)
+{
+while (size) {
+size_t pending = f->buf_size - f->buf_index;
+ssize_t rc;
+
+if (!pending) {
+rc = qemu_fill_buffer(f);
+if (rc < 0) {
+return rc;
+}
+if (rc == 0) {
+return -EIO;
+}
+continue;
+}
+
+rc = write(fd, f->buf + f->buf_index, MIN(pending, size));
+if (rc < 0) {
+return -errno;
+}
+if (rc == 0) {
+return -EIO;
+}
+f->buf_index += rc;
+size -= rc;
+}
+
+return 0;
+}
-- 
2.26.3

[PATCH v7 02/13] migration: No save_live_pending() method uses the QEMUFile parameter

2023-01-15 Thread Avihai Horon

From: Juan Quintela 

So remove it everywhere.

Signed-off-by: Juan Quintela 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Dr. David Alan Gilbert 
---
 include/migration/register.h   | 3 +--
 migration/savevm.h | 3 +--
 hw/s390x/s390-stattrib.c   | 2 +-
 hw/vfio/migration.c| 3 +--
 migration/block-dirty-bitmap.c | 3 +--
 migration/block.c  | 2 +-
 migration/migration.c  | 4 ++--
 migration/ram.c| 2 +-
 migration/savevm.c | 7 +++
 9 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index c1dcff0f90..eb6266a877 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -46,8 +46,7 @@ typedef struct SaveVMHandlers {
 
 /* This runs outside the iothread lock!  */
 int (*save_setup)(QEMUFile *f, void *opaque);
-void (*save_live_pending)(QEMUFile *f, void *opaque,
-  uint64_t threshold_size,
+void (*save_live_pending)(void *opaque, uint64_t threshold_size,
   uint64_t *res_precopy_only,
   uint64_t *res_compatible,
   uint64_t *res_postcopy_only);
diff --git a/migration/savevm.h b/migration/savevm.h
index 6461342cb4..6dec468cc3 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -40,8 +40,7 @@ void qemu_savevm_state_cleanup(void);
 void qemu_savevm_state_complete_postcopy(QEMUFile *f);
 int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
bool inactivate_disks);
-void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
-   uint64_t *res_precopy_only,
+void qemu_savevm_state_pending(uint64_t max_size, uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index 9eda1c3b2a..a553a1e850 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c
@@ -182,7 +182,7 @@ static int cmma_save_setup(QEMUFile *f, void *opaque)
 return 0;
 }
 
-static void cmma_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
+static void cmma_save_pending(void *opaque, uint64_t max_size,
   uint64_t *res_precopy_only,
   uint64_t *res_compatible,
   uint64_t *res_postcopy_only)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index c74453e0b5..e1413ac90c 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -456,8 +456,7 @@ static void vfio_save_cleanup(void *opaque)
 trace_vfio_save_cleanup(vbasedev->name);
 }
 
-static void vfio_save_pending(QEMUFile *f, void *opaque,
-  uint64_t threshold_size,
+static void vfio_save_pending(void *opaque, uint64_t threshold_size,
   uint64_t *res_precopy_only,
   uint64_t *res_compatible,
   uint64_t *res_postcopy_only)
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 283017d7d3..ffc433cd11 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -761,8 +761,7 @@ static int dirty_bitmap_save_complete(QEMUFile *f, void 
*opaque)
 return 0;
 }
 
-static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
-  uint64_t max_size,
+static void dirty_bitmap_save_pending(void *opaque, uint64_t max_size,
   uint64_t *res_precopy_only,
   uint64_t *res_compatible,
   uint64_t *res_postcopy_only)
diff --git a/migration/block.c b/migration/block.c
index 4347da1526..b6a98caf78 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -862,7 +862,7 @@ static int block_save_complete(QEMUFile *f, void *opaque)
 return 0;
 }
 
-static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
+static void block_save_pending(void *opaque, uint64_t max_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
diff --git a/migration/migration.c b/migration/migration.c
index 52b5d39244..9795d0ec5c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3751,8 +3751,8 @@ static MigIterateState 
migration_iteration_run(MigrationState *s)
 uint64_t pending_size, pend_pre, pend_compat, pend_post;
 bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
 
-qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, _pre,
-  _compat, _post);
+

[PATCH v7 07/13] vfio/migration: Move migration v1 logic to vfio_migration_init()

2023-01-15 Thread Avihai Horon

Move vfio_dev_get_region_info() logic from vfio_migration_probe() to
vfio_migration_init(). This logic is specific to v1 protocol and moving
it will make it easier to add the v2 protocol implementation later.
No functional changes intended.

Signed-off-by: Avihai Horon 
Reviewed-by: Cédric Le Goater 
---
 hw/vfio/migration.c  | 30 +++---
 hw/vfio/trace-events |  2 +-
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 552c2313b2..977da64411 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -788,14 +788,14 @@ static void vfio_migration_exit(VFIODevice *vbasedev)
 vbasedev->migration = NULL;
 }
 
-static int vfio_migration_init(VFIODevice *vbasedev,
-   struct vfio_region_info *info)
+static int vfio_migration_init(VFIODevice *vbasedev)
 {
 int ret;
 Object *obj;
 VFIOMigration *migration;
 char id[256] = "";
 g_autofree char *path = NULL, *oid = NULL;
+struct vfio_region_info *info;
 
 if (!vbasedev->ops->vfio_get_object) {
 return -EINVAL;
@@ -806,6 +806,14 @@ static int vfio_migration_init(VFIODevice *vbasedev,
 return -EINVAL;
 }
 
+ret = vfio_get_dev_region_info(vbasedev,
+   VFIO_REGION_TYPE_MIGRATION_DEPRECATED,
+   VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED,
+   );
+if (ret) {
+return ret;
+}
+
 vbasedev->migration = g_new0(VFIOMigration, 1);
 vbasedev->migration->device_state = VFIO_DEVICE_STATE_V1_RUNNING;
 vbasedev->migration->vm_running = runstate_is_running();
@@ -825,6 +833,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
 goto err;
 }
 
+g_free(info);
+
 migration = vbasedev->migration;
 migration->vbasedev = vbasedev;
 
@@ -847,6 +857,7 @@ static int vfio_migration_init(VFIODevice *vbasedev,
 return 0;
 
 err:
+g_free(info);
 vfio_migration_exit(vbasedev);
 return ret;
 }
@@ -860,34 +871,23 @@ int64_t vfio_mig_bytes_transferred(void)
 
 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 {
-struct vfio_region_info *info = NULL;
 int ret = -ENOTSUP;
 
 if (!vbasedev->enable_migration) {
 goto add_blocker;
 }
 
-ret = vfio_get_dev_region_info(vbasedev,
-   VFIO_REGION_TYPE_MIGRATION_DEPRECATED,
-   VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED,
-   );
-if (ret) {
-goto add_blocker;
-}
-
-ret = vfio_migration_init(vbasedev, info);
+ret = vfio_migration_init(vbasedev);
 if (ret) {
 goto add_blocker;
 }
 
-trace_vfio_migration_probe(vbasedev->name, info->index);
-g_free(info);
+trace_vfio_migration_probe(vbasedev->name);
 return 0;
 
 add_blocker:
 error_setg(>migration_blocker,
"VFIO device doesn't support migration");
-g_free(info);
 
 ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
 if (ret < 0) {
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 73dffe9e00..b259dcc644 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -148,7 +148,7 @@ vfio_display_edid_update(uint32_t prefx, uint32_t prefy) 
"%ux%u"
 vfio_display_edid_write_error(void) ""
 
 # migration.c
-vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
+vfio_migration_probe(const char *name) " (%s)"
 vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
 vfio_vmstate_change(const char *name, int running, const char *reason, 
uint32_t dev_state) " (%s) running %d reason %s device state %d"
 vfio_migration_state_notifier(const char *name, const char *state) " (%s) 
state %s"
-- 
2.26.3

[PATCH v7 03/13] vfio/migration: Fix NULL pointer dereference bug

2023-01-15 Thread Avihai Horon

As part of its error flow, vfio_vmstate_change() accesses
MigrationState->to_dst_file without any checks. This can cause a NULL
pointer dereference if the error flow is taken and
MigrationState->to_dst_file is not set.

For example, this can happen if VM is started or stopped not during
migration and vfio_vmstate_change() error flow is taken, as
MigrationState->to_dst_file is not set at that time.

Fix it by checking that MigrationState->to_dst_file is set before using
it.

Fixes: 02a7e71b1e5b ("vfio: Add VM state change handler to know state of VM")
Signed-off-by: Avihai Horon 
Reviewed-by: Juan Quintela 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 hw/vfio/migration.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index e1413ac90c..09fe7c1de2 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -743,7 +743,9 @@ static void vfio_vmstate_change(void *opaque, bool running, 
RunState state)
  */
 error_report("%s: Failed to set device state 0x%x", vbasedev->name,
  (migration->device_state & mask) | value);
-qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
+if (migrate_get_current()->to_dst_file) {
+qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
+}
 }
 vbasedev->migration->vm_running = running;
 trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
-- 
2.26.3

[PATCH v7 06/13] vfio/common: Change vfio_devices_all_running_and_saving() logic to equivalent one

2023-01-15 Thread Avihai Horon

vfio_devices_all_running_and_saving() is used to check if migration is
in pre-copy phase. This is done by checking if migration is in setup or
active states and if all VFIO devices are in pre-copy state, i.e.
_SAVING | _RUNNING.

In VFIO migration protocol v2 pre-copy support is made optional. Hence,
a matching v2 protocol pre-copy state can't be used here.

As preparation for adding v2 protocol, change
vfio_devices_all_running_and_saving() logic such that it doesn't use the
VFIO pre-copy state.

The new equivalent logic checks if migration is in active state and if
all VFIO devices are in running state [1]. No functional changes
intended.

[1] Note that checking if migration is in setup or active states and if
all VFIO devices are in running state doesn't guarantee that we are in
pre-copy phase, thus we check if migration is only in active state.

Signed-off-by: Avihai Horon 
Reviewed-by: Cédric Le Goater 
---
 hw/vfio/common.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index f6dd571549..3a35f4afad 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -40,6 +40,7 @@
 #include "trace.h"
 #include "qapi/error.h"
 #include "migration/migration.h"
+#include "migration/misc.h"
 #include "sysemu/tpm.h"
 
 VFIOGroupList vfio_group_list =
@@ -363,13 +364,16 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer 
*container)
 return true;
 }
 
-static bool vfio_devices_all_running_and_saving(VFIOContainer *container)
+/*
+ * Check if all VFIO devices are running and migration is active, which is
+ * essentially equivalent to the migration being in pre-copy phase.
+ */
+static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
 {
 VFIOGroup *group;
 VFIODevice *vbasedev;
-MigrationState *ms = migrate_get_current();
 
-if (!migration_is_setup_or_active(ms->state)) {
+if (!migration_is_active(migrate_get_current())) {
 return false;
 }
 
@@ -381,8 +385,7 @@ static bool 
vfio_devices_all_running_and_saving(VFIOContainer *container)
 return false;
 }
 
-if ((migration->device_state & VFIO_DEVICE_STATE_V1_SAVING) &&
-(migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) {
+if (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING) {
 continue;
 } else {
 return false;
@@ -461,7 +464,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
 };
 
 if (iotlb && container->dirty_pages_supported &&
-vfio_devices_all_running_and_saving(container)) {
+vfio_devices_all_running_and_mig_active(container)) {
 return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
 }
 
@@ -488,7 +491,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
 return -errno;
 }
 
-if (iotlb && vfio_devices_all_running_and_saving(container)) {
+if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
 cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
 tcg_enabled() ? DIRTY_CLIENTS_ALL :
 DIRTY_CLIENTS_NOCODE);
-- 
2.26.3

[PATCH v7 01/13] linux-headers: Update to v6.2-rc1

2023-01-15 Thread Avihai Horon

Update to commit 1b929c02afd3 ("Linux 6.2-rc1").

Signed-off-by: Avihai Horon 
---
 include/standard-headers/drm/drm_fourcc.h |  63 +++-
 include/standard-headers/linux/ethtool.h  |  81 -
 include/standard-headers/linux/fuse.h |  20 +-
 .../linux/input-event-codes.h |   4 +
 include/standard-headers/linux/pci_regs.h |   2 +
 include/standard-headers/linux/virtio_blk.h   |  19 ++
 include/standard-headers/linux/virtio_bt.h|   8 +
 include/standard-headers/linux/virtio_net.h   |   4 +
 linux-headers/asm-arm64/kvm.h |   1 +
 linux-headers/asm-generic/hugetlb_encode.h|  26 +-
 linux-headers/asm-generic/mman-common.h   |   2 +
 linux-headers/asm-mips/mman.h |   2 +
 linux-headers/asm-riscv/kvm.h |   7 +
 linux-headers/asm-x86/kvm.h   |  11 +-
 linux-headers/linux/kvm.h |  32 +-
 linux-headers/linux/psci.h|  14 +
 linux-headers/linux/userfaultfd.h |   4 +
 linux-headers/linux/vfio.h| 278 +-
 18 files changed, 522 insertions(+), 56 deletions(-)

diff --git a/include/standard-headers/drm/drm_fourcc.h 
b/include/standard-headers/drm/drm_fourcc.h
index 48b620cbef..69cab17b38 100644
--- a/include/standard-headers/drm/drm_fourcc.h
+++ b/include/standard-headers/drm/drm_fourcc.h
@@ -98,18 +98,42 @@ extern "C" {
 #define DRM_FORMAT_INVALID 0
 
 /* color index */
+#define DRM_FORMAT_C1  fourcc_code('C', '1', ' ', ' ') /* [7:0] 
C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */
+#define DRM_FORMAT_C2  fourcc_code('C', '2', ' ', ' ') /* [7:0] 
C0:C1:C2:C3 2:2:2:2 four pixels/byte */
+#define DRM_FORMAT_C4  fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 
4:4 two pixels/byte */
 #define DRM_FORMAT_C8  fourcc_code('C', '8', ' ', ' ') /* [7:0] C */
 
-/* 8 bpp Red */
+/* 1 bpp Darkness (inverse relationship between channel value and brightness) 
*/
+#define DRM_FORMAT_D1  fourcc_code('D', '1', ' ', ' ') /* [7:0] 
D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Darkness (inverse relationship between channel value and brightness) 
*/
+#define DRM_FORMAT_D2  fourcc_code('D', '2', ' ', ' ') /* [7:0] 
D0:D1:D2:D3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Darkness (inverse relationship between channel value and brightness) 
*/
+#define DRM_FORMAT_D4  fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 
4:4 two pixels/byte */
+
+/* 8 bpp Darkness (inverse relationship between channel value and brightness) 
*/
+#define DRM_FORMAT_D8  fourcc_code('D', '8', ' ', ' ') /* [7:0] D */
+
+/* 1 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R1  fourcc_code('R', '1', ' ', ' ') /* [7:0] 
R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R2  fourcc_code('R', '2', ' ', ' ') /* [7:0] 
R0:R1:R2:R3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R4  fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 
4:4 two pixels/byte */
+
+/* 8 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R8  fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
 
-/* 10 bpp Red */
+/* 10 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 
6:10 little endian */
 
-/* 12 bpp Red */
+/* 12 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 
4:12 little endian */
 
-/* 16 bpp Red */
+/* 16 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R 
little endian */
 
 /* 16 bpp RG */
@@ -204,7 +228,9 @@ extern "C" {
 #define DRM_FORMAT_VYUYfourcc_code('V', 'Y', 'U', 'Y') /* 
[31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */
 
 #define DRM_FORMAT_AYUVfourcc_code('A', 'Y', 'U', 'V') /* 
[31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_AVUYfourcc_code('A', 'V', 'U', 'Y') /* [31:0] 
A:Cr:Cb:Y 8:8:8:8 little endian */
 #define DRM_FORMAT_XYUVfourcc_code('X', 'Y', 'U', 'V') /* [31:0] 
X:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_XVUYfourcc_code('X', 'V', 'U', 'Y') /* [31:0] 
X:Cr:Cb:Y 8:8:8:8 little endian */
 #define DRM_FORMAT_VUY888  fourcc_code('V', 'U', '2', '4') /* [23:0] 
Cr:Cb:Y 8:8:8 little endian */
 #define DRM_FORMAT_VUY101010   fourcc_code('V', 'U', '3', '0') /* Y followed 
by U then V, 10:10:10. Non-linear modifier only */
 
@@ -717,6 +743,35 @@ extern "C" {
  */
 #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE,

[PATCH v7 00/13] vfio/migration: Implement VFIO migration protocol v2

2023-01-15 Thread Avihai Horon

Hello,

Following VFIO migration protocol v2 acceptance in kernel, this series
implements VFIO migration according to the new v2 protocol and replaces
the now deprecated v1 implementation.

The main differences between v1 and v2 migration protocols are:
1. VFIO device state is represented as a finite state machine instead of
   a bitmap.

2. The migration interface with kernel is done using VFIO_DEVICE_FEATURE
   ioctl and normal read() and write() instead of the migration region
   used in v1.

3. Pre-copy is made optional in v2 protocol. Support for pre-copy will
   be added later on.

Full description of the v2 protocol and the differences from v1 can be
found here [1].



Patch list:

Patch 1 updates linux headers so we will have the MIG_DATA_SIZE ioctl.

Patches 2-8 are prep patches fixing bugs, adding QEMUFile function
that will be used later and refactoring v1 protocol code to make it
easier to add v2 protocol.

Patches 9-13 implement v2 protocol and remove v1 protocol.

Thanks.



Changes from v6 [2]:
- Fixed another compilation error in patch #9 reported by Cedric.
- Added Reviewed-by tags.



Changes from v5 [3]:
- Dropped patch #3.
- Simplified patch #5 as per Alex's suggestion.
- Changed qemu_file_get_to_fd() to return -EIO instead of -1, as
  suggested by Cedric.
  Also changed it so now write returns -errno instead of -1 on error.
- Fixed compilation error reported by Cedric.
- Changed vfio_migration_query_flags() to print error message and return
  -errno in error case as suggested by Cedric.
- Added Reviewed-by tags.



Changes from v4 [4]:
- Rebased on latest master branch.
- Added linux header update to kernel v6.2-rc1.
- Merged preview patches (#13-14) into this series.



Changes from v3 [5]:
- Rebased on latest master branch.

- Dropped patch #1 "migration: Remove res_compatible parameter" as
  it's not mandatory to this series and needs some further discussion.

- Dropped patch #3 "migration: Block migration comment or code is
  wrong" as it has been merged already.

- Addressed overlooked corner case reported by Vladimir in patch #4
  "migration: Simplify migration_iteration_run()".

- Dropped patch #5 "vfio/migration: Fix wrong enum usage" as it has
  been merged already.

- In patch #12 "vfio/migration: Implement VFIO migration protocol v2":
  1. Changed vfio_save_pending() to update res_precopy_only instead of
 res_postcopy_only (as VFIO migration doesn’t support postcopy).
  2. Moved VFIOMigration->data_buffer allocation to vfio_save_setup()
 and its de-allocation to vfio_save_cleanup(), so now it's
 allocated when actually used (during migration and only on source
 side).

- Addressed Alex's comments:
  1. Eliminated code duplication in patch #7 "vfio/migration: Allow
 migration without VFIO IOMMU dirty tracking support".
  2. Removed redundant initialization of vfio_region_info in patch #10
 "vfio/migration: Move migration v1 logic to vfio_migration_init()".
  3. Added comment about VFIO_MIG_DATA_BUFFER_SIZE heuristic (and
 renamed to VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE).
  4. Cast migration structs to their actual types instead of void *.
  5. Return -errno and -EBADF instead of -1 in vfio_migration_set_state().
  6. Set migration->device_state to new_state even in case of data_fd
 out of sync. Although migration will be aborted, setting device
 state succeeded so we should reflect that.
  7. Renamed VFIO_MIG_PENDING_SIZE to VFIO_MIG_STOP_COPY_SIZE, set it
 to 100G and added a comment about the size choice.
  8. Changed vfio_save_block() to return -errno on error.
  9. Squashed Patch #14 to patch #12.
  10. Adjusted migration data buffer size according to MIG_DATA_SIZE
  ioctl.

- In preview patch #17 "vfio/migration: Query device data size in
  vfio_save_pending()" - changed vfio_save_pending() to report
  VFIO_MIG_STOP_COPY_SIZE on any error.
   
- Added another preview patch "vfio/migration: Optimize
  vfio_save_pending()".

- Added ret value on some traces as suggested by David.

- Added Reviewed-By tags.



Changes from v2 [6]:
- Rebased on top of latest master branch.

- Added relevant patches from Juan's RFC [7] with minor changes:
  1. Added Reviewed-by tag to patch #3 in the RFC.
  2. Adjusted patch #6 to work without patch #4 in the RFC.

- Added a new patch "vfio/migration: Fix wrong enum usage" that fixes a
  small bug in v1 code. This patch has been sent a few weeks ago [8] but
  wasn't taken yet.

- Patch #2 (vfio/migration: Skip pre-copy if dirty page tracking is not
  supported):
  1. Dropped this patch and replaced it with
 "vfio/migration: Allow migration without VFIO IOMMU dirty tracking
 support".
 The new patch uses a different approach – instead of skipping
 pre-copy phase completely, QEMU VFIO code will mark RAM dirty
 (instead of kernel). This ensures that current migration behavior
 is not changed and SLA is taken into account.

- Patch #4 (vfio/common: Change

[PATCH] vhost-user-fs: add capability to allow migration

2023-01-15 Thread Anton Kuchin

Now any vhost-user-fs device makes VM unmigratable, that also prevents
qemu update without stopping the VM. In most cases that makes sense
because qemu has no way to transfer FUSE session state.

But we can give an option to orchestrator to override this if it can
guarantee that state will be preserved (e.g. it uses migration to
update qemu and dst will run on the same host as src and use the same
socket endpoints).

This patch keeps default behavior that prevents migration with such devices
but adds migration capability 'vhost-user-fs' to explicitly allow migration.

Signed-off-by: Anton Kuchin 
---
 hw/virtio/vhost-user-fs.c | 25 -
 qapi/migration.json   |  7 ++-
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index f5049735ac..13d920423e 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -24,6 +24,7 @@
 #include "hw/virtio/vhost-user-fs.h"
 #include "monitor/monitor.h"
 #include "sysemu/sysemu.h"
+#include "migration/migration.h"
 
 static const int user_feature_bits[] = {
 VIRTIO_F_VERSION_1,
@@ -298,9 +299,31 @@ static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
 return >vhost_dev;
 }
 
+static int vhost_user_fs_pre_save(void *opaque)
+{
+MigrationState *s = migrate_get_current();
+
+if (!s->enabled_capabilities[MIGRATION_CAPABILITY_VHOST_USER_FS]) {
+error_report("Migration of vhost-user-fs devices requires internal 
FUSE "
+ "state of backend to be preserved. If orchestrator can "
+ "guarantee this (e.g. dst connects to the same backend "
+ "instance or backend state is migrated) set 
'vhost-user-fs' "
+ "migration capability to true to enable migration.");
+return -1;
+}
+
+return 0;
+}
+
 static const VMStateDescription vuf_vmstate = {
 .name = "vhost-user-fs",
-.unmigratable = 1,
+.minimum_version_id = 0,
+.version_id = 0,
+.fields = (VMStateField[]) {
+VMSTATE_VIRTIO_DEVICE,
+VMSTATE_END_OF_LIST()
+},
+   .pre_save = vhost_user_fs_pre_save,
 };
 
 static Property vuf_properties[] = {
diff --git a/qapi/migration.json b/qapi/migration.json
index 88ecf86ac8..9a229ea884 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -477,6 +477,11 @@
 #will be handled faster.  This is a performance feature and
 #should not affect the correctness of postcopy migration.
 #(since 7.1)
+# @vhost-user-fs: If enabled, the migration process will allow migration of
+# vhost-user-fs devices, this should be enabled only when
+# backend can preserve local FUSE state e.g. for qemu update
+# when dst reconects to the same endpoints after migration.
+# (since 8.0)
 #
 # Features:
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
@@ -492,7 +497,7 @@
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
'validate-uuid', 'background-snapshot',
-   'zero-copy-send', 'postcopy-preempt'] }
+   'zero-copy-send', 'postcopy-preempt', 'vhost-user-fs'] }
 
 ##
 # @MigrationCapabilityStatus:
-- 
2.34.1

[PATCH] target/arm: Widen cnthctl_el2 to uint64_t

2023-01-15 Thread Richard Henderson

This is a 64-bit register on AArch64, even if the high 44 bits
are RES0.  Because this is defined as ARM_CP_STATE_BOTH, we are
asserting that the cpreg field is 64-bits.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1400
Signed-off-by: Richard Henderson 
---

During my perigrinations of reorganizing cpregs, I've been thinking
of ways to detect these sorts of errors -- preferably at build time,
but failing that at startup.  I think all raw usage of offsetof has
got to be replaced with something like

#define cpreg_fieldoffset(field) \
.fieldoffset = offsetof(CPUARMState, field), \
.fieldsize = sizeof(((CPUARMState *)0)->field),

I'm not going to touch this until Fabiano's --disable-tcg cleanup lands.

r~

---
 target/arm/cpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index bf2bce046d..1feb63b4d7 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -479,7 +479,7 @@ typedef struct CPUArchState {
 };
 uint64_t c14_cntfrq; /* Counter Frequency register */
 uint64_t c14_cntkctl; /* Timer Control register */
-uint32_t cnthctl_el2; /* Counter/Timer Hyp Control register */
+uint64_t cnthctl_el2; /* Counter/Timer Hyp Control register */
 uint64_t cntvoff_el2; /* Counter Virtual Offset register */
 ARMGenericTimer c14_timer[NUM_GTIMERS];
 uint32_t c15_cpar; /* XScale Coprocessor Access Register */
-- 
2.34.1

[PATCH v3] Windows installer: keep dependency cache

2023-01-15 Thread Arthur Sengileyev

It should be possible to reuse cache built by previous iteration
to process next executables. Processed dependencies are already
skipped later based on dll name.

Signed-off-by: Arthur Sengileyev 
Reviewed-by: Bin Meng 
---
 scripts/nsis.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/nsis.py b/scripts/nsis.py
index 03ed7608a2..7cffba70ff 100644
--- a/scripts/nsis.py
+++ b/scripts/nsis.py
@@ -91,12 +91,13 @@ def main():
 print("Searching '%s' for the dependent dlls ..." % search_path)
 dlldir = os.path.join(destdir + prefix, "dll")
 os.mkdir(dlldir)
+deps_cache = set()
 
 for exe in glob.glob(os.path.join(destdir + prefix, "*.exe")):
 signcode(exe)
 
 # find all dll dependencies
-deps = set(find_deps(exe, search_path, set()))
+deps = set(find_deps(exe, search_path, deps_cache))
 deps.remove(exe)
 
 # copy all dlls to the DLLDIR
-- 
2.39.0

Re: [PATCH 0/2] target/riscv: Fix double calls to gen_set_rm [#1411]

2023-01-15 Thread Richard Henderson


On 1/15/23 06:06, Richard Henderson wrote:

These double calls tickle an assertion in decode_save_opc,
and isn't efficient anyway.  Introduce a new helper to do
exactly what was desired.


Also #1339.


r~

[PATCH 2/2] target/riscv: Remove helper_set_rod_rounding_mode

2023-01-15 Thread Richard Henderson

The only setting of RISCV_FRM_ROD is from the vector unit,
and now handled by helper_set_rounding_mode_chkfrm.
This helper is now unused.

Signed-off-by: Richard Henderson 
---
 target/riscv/helper.h | 1 -
 target/riscv/fpu_helper.c | 5 -
 target/riscv/translate.c  | 4 
 3 files changed, 10 deletions(-)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 9792ab5086..58a30f03d6 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -4,7 +4,6 @@ DEF_HELPER_2(raise_exception, noreturn, env, i32)
 /* Floating Point - rounding mode */
 DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_WG, void, env, i32)
 DEF_HELPER_FLAGS_2(set_rounding_mode_chkfrm, TCG_CALL_NO_WG, void, env, i32)
-DEF_HELPER_FLAGS_1(set_rod_rounding_mode, TCG_CALL_NO_WG, void, env)
 
 /* Floating Point - fused */
 DEF_HELPER_FLAGS_4(fmadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
index 96817df8ef..449d236df6 100644
--- a/target/riscv/fpu_helper.c
+++ b/target/riscv/fpu_helper.c
@@ -118,11 +118,6 @@ void helper_set_rounding_mode_chkfrm(CPURISCVState *env, 
uint32_t rm)
 set_float_rounding_mode(softrm, >fp_status);
 }
 
-void helper_set_rod_rounding_mode(CPURISCVState *env)
-{
-set_float_rounding_mode(float_round_to_odd, >fp_status);
-}
-
 static uint64_t do_fmadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2,
uint64_t rs3, int flags)
 {
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 493c3815e1..01cc30a365 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -672,10 +672,6 @@ static void gen_set_rm(DisasContext *ctx, int rm)
 }
 ctx->frm = rm;
 
-if (rm == RISCV_FRM_ROD) {
-gen_helper_set_rod_rounding_mode(cpu_env);
-return;
-}
 if (rm == RISCV_FRM_DYN) {
 /* The helper will return only if frm valid. */
 ctx->frm_valid = true;
-- 
2.34.1

[PATCH 1/2] target/arm: Introduce helper_set_rounding_mode_chkfrm

2023-01-15 Thread Richard Henderson

The new helper always validates the contents of FRM, even
if the new rounding mode is not DYN.  This is required by
the vector unit.

Track whether we've validated FRM separately from whether
we've updated fp_status with a given rounding mode, so that
we can elide calls correctly.

This partially reverts d6c4d3f2a69 which attempted the to do
the same thing, but with two calls to gen_set_rm(), which is
both inefficient and tickles an assertion in decode_save_opc.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1441
Signed-off-by: Richard Henderson 
---
 target/riscv/helper.h   |  1 +
 target/riscv/fpu_helper.c   | 37 +
 target/riscv/translate.c| 19 +
 target/riscv/insn_trans/trans_rvv.c.inc | 24 +++-
 4 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 227c7122ef..9792ab5086 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -3,6 +3,7 @@ DEF_HELPER_2(raise_exception, noreturn, env, i32)
 
 /* Floating Point - rounding mode */
 DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_FLAGS_2(set_rounding_mode_chkfrm, TCG_CALL_NO_WG, void, env, i32)
 DEF_HELPER_FLAGS_1(set_rod_rounding_mode, TCG_CALL_NO_WG, void, env)
 
 /* Floating Point - fused */
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
index 5699c9517f..96817df8ef 100644
--- a/target/riscv/fpu_helper.c
+++ b/target/riscv/fpu_helper.c
@@ -81,6 +81,43 @@ void helper_set_rounding_mode(CPURISCVState *env, uint32_t 
rm)
 set_float_rounding_mode(softrm, >fp_status);
 }
 
+void helper_set_rounding_mode_chkfrm(CPURISCVState *env, uint32_t rm)
+{
+int softrm;
+
+/* Always validate frm, even if rm != DYN. */
+if (unlikely(env->frm >= 5)) {
+riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+}
+if (rm == RISCV_FRM_DYN) {
+rm = env->frm;
+}
+switch (rm) {
+case RISCV_FRM_RNE:
+softrm = float_round_nearest_even;
+break;
+case RISCV_FRM_RTZ:
+softrm = float_round_to_zero;
+break;
+case RISCV_FRM_RDN:
+softrm = float_round_down;
+break;
+case RISCV_FRM_RUP:
+softrm = float_round_up;
+break;
+case RISCV_FRM_RMM:
+softrm = float_round_ties_away;
+break;
+case RISCV_FRM_ROD:
+softrm = float_round_to_odd;
+break;
+default:
+g_assert_not_reached();
+}
+
+set_float_rounding_mode(softrm, >fp_status);
+}
+
 void helper_set_rod_rounding_mode(CPURISCVState *env)
 {
 set_float_rounding_mode(float_round_to_odd, >fp_status);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index df38db7553..493c3815e1 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -114,6 +114,8 @@ typedef struct DisasContext {
 bool pm_base_enabled;
 /* Use icount trigger for native debug */
 bool itrigger;
+/* FRM is known to contain a valid value. */
+bool frm_valid;
 /* TCG of the current insn_start */
 TCGOp *insn_start;
 } DisasContext;
@@ -674,12 +676,29 @@ static void gen_set_rm(DisasContext *ctx, int rm)
 gen_helper_set_rod_rounding_mode(cpu_env);
 return;
 }
+if (rm == RISCV_FRM_DYN) {
+/* The helper will return only if frm valid. */
+ctx->frm_valid = true;
+}
 
 /* The helper may raise ILLEGAL_INSN -- record binv for unwind. */
 decode_save_opc(ctx);
 gen_helper_set_rounding_mode(cpu_env, tcg_constant_i32(rm));
 }
 
+static void gen_set_rm_chkfrm(DisasContext *ctx, int rm)
+{
+if (ctx->frm == rm && ctx->frm_valid) {
+return;
+}
+ctx->frm = rm;
+ctx->frm_valid = true;
+
+/* The helper may raise ILLEGAL_INSN -- record binv for unwind. */
+decode_save_opc(ctx);
+gen_helper_set_rounding_mode_chkfrm(cpu_env, tcg_constant_i32(rm));
+}
+
 static int ex_plus_1(DisasContext *ctx, int nf)
 {
 return nf + 1;
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index d455acedbf..bbb5c3a7b5 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2679,13 +2679,9 @@ static bool do_opfv(DisasContext *s, arg_rmr *a,
 int rm)
 {
 if (checkfn(s, a)) {
-if (rm != RISCV_FRM_DYN) {
-gen_set_rm(s, RISCV_FRM_DYN);
-}
-
 uint32_t data = 0;
 TCGLabel *over = gen_new_label();
-gen_set_rm(s, rm);
+gen_set_rm_chkfrm(s, rm);
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
 tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
@@ -2882,17 +2878,13 @@ static bool opffv_widen_check(DisasContext *s, arg_rmr 
*a)
 static bool trans_##NAME(DisasContext *s, arg_rmr *a)  \
 {  \

[PATCH 0/2] target/riscv: Fix double calls to gen_set_rm [#1411]

2023-01-15 Thread Richard Henderson

These double calls tickle an assertion in decode_save_opc,
and isn't efficient anyway.  Introduce a new helper to do
exactly what was desired.


r~


Richard Henderson (2):
  target/arm: Introduce helper_set_rounding_mode_chkfrm
  target/riscv: Remove helper_set_rod_rounding_mode

 target/riscv/helper.h   |  2 +-
 target/riscv/fpu_helper.c   | 36 +++--
 target/riscv/translate.c| 21 ---
 target/riscv/insn_trans/trans_rvv.c.inc | 24 +++--
 4 files changed, 57 insertions(+), 26 deletions(-)

-- 
2.34.1

Re: [PATCH v2 0/5] parallels: Add duplication check, repair at open, fix bugs

2023-01-15 Thread Alexander Ivanov

This patchset should be applied on the top of *[PATCH v8 00/11] 
parallels: Refactor the code of images checks and fix a bug*


On 12.01.2023 16:01, Alexander Ivanov wrote:

Fix incorrect data end calculation in parallels_open().

Split image leak handling to separate check and fix helpers.

Add checking and repairing duplicate offsets in BAT

Replace fprintf() by qemu_log().

Image repairing in parallels_open().

v2:
2: Moved outsude parallels_check_leak() 2 helpers:
parallels_get_leak_size() and parallels_fix_leak().

3: Used highest_offset() helper in parallels_check_leak(). Fixed a typo.

Added comments. Replaced g_malloc() call by qemu_memalign(). Replaced
bdrv_pread() call by bdrv_co_pread(). Got rid of keeping bytes and
sectors in the same variable. Added setting the bitmap of the used
clusters for a new allocated cluster if it isn't out of the bitmap.
Moved the leak fix to the end of all the checks. Removed a dependence
on image format for the duplicate check.

4 (old): Merged this patch to the previous.

4 (former 5): Fixed formatting.
5 (former 6): Fixed comments. Added O_INACTIVE check in the condition.
   Replaced inuse detection by header_unclean checking.
   Replaced playing with corutines by bdrv_check() usage.

Alexander Ivanov (5):
   parallels: Incorrect data end calculation in parallels_open()
   parallels: Split image leak handling to separate check and fix helpers
   parallels: Add checking and repairing duplicate offsets in BAT
   parallels: Replace fprintf by qemu_log in check
   parallels: Image repairing in parallels_open()

  block/parallels.c | 321 +++---
  1 file changed, 247 insertions(+), 74 deletions(-)

[PATCH v8 03/11] parallels: Fix data_end after out-of-image check

2023-01-15 Thread Alexander Ivanov

Set data_end to the end of the last cluster inside the image.
In such a way we can be sure that corrupted offsets in the BAT
can't affect on the image size.

Signed-off-by: Alexander Ivanov 
Reviewed-by: Denis V. Lunev 
---
 block/parallels.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/parallels.c b/block/parallels.c
index 436b36bbd9..9fe0f33ba9 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -514,6 +514,8 @@ static int coroutine_fn parallels_co_check(BlockDriverState 
*bs,
 }
 }
 
+s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
+
 out:
 qemu_co_mutex_unlock(>lock);
 return ret;
-- 
2.34.1

[PATCH v8 09/11] parallels: Move statistic collection to a separate function

2023-01-15 Thread Alexander Ivanov

We will add more and more checks so we need a better code structure
in parallels_co_check. Let each check performs in a separate loop
in a separate helper.

Signed-off-by: Alexander Ivanov 
Reviewed-by: Denis V. Lunev 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/parallels.c | 53 +++
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 5db099b1dd..6e7f140e06 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -526,47 +526,56 @@ static int parallels_check_leak(BlockDriverState *bs,
 return 0;
 }
 
-static int coroutine_fn parallels_co_check(BlockDriverState *bs,
-   BdrvCheckResult *res,
-   BdrvCheckMode fix)
+static void parallels_collect_statistics(BlockDriverState *bs,
+ BdrvCheckResult *res,
+ BdrvCheckMode fix)
 {
 BDRVParallelsState *s = bs->opaque;
-int64_t prev_off;
-int ret;
+int64_t off, prev_off;
 uint32_t i;
 
-qemu_co_mutex_lock(>lock);
-
-parallels_check_unclean(bs, res, fix);
-
-ret = parallels_check_outside_image(bs, res, fix);
-if (ret < 0) {
-goto out;
-}
-
-ret = parallels_check_leak(bs, res, fix);
-if (ret < 0) {
-goto out;
-}
-
 res->bfi.total_clusters = s->bat_size;
 res->bfi.compressed_clusters = 0; /* compression is not supported */
 
 prev_off = 0;
 for (i = 0; i < s->bat_size; i++) {
-int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+off = bat2sect(s, i) << BDRV_SECTOR_BITS;
 if (off == 0) {
 prev_off = 0;
 continue;
 }
 
-res->bfi.allocated_clusters++;
-
 if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
 res->bfi.fragmented_clusters++;
 }
+
 prev_off = off;
+res->bfi.allocated_clusters++;
 }
+}
+
+static int coroutine_fn parallels_co_check(BlockDriverState *bs,
+   BdrvCheckResult *res,
+   BdrvCheckMode fix)
+{
+BDRVParallelsState *s = bs->opaque;
+int ret;
+
+qemu_co_mutex_lock(>lock);
+
+parallels_check_unclean(bs, res, fix);
+
+ret = parallels_check_outside_image(bs, res, fix);
+if (ret < 0) {
+goto out;
+}
+
+ret = parallels_check_leak(bs, res, fix);
+if (ret < 0) {
+goto out;
+}
+
+parallels_collect_statistics(bs, res, fix);
 
 out:
 qemu_co_mutex_unlock(>lock);
-- 
2.34.1

[PATCH v8 01/11] parallels: Out of image offset in BAT leads to image inflation

2023-01-15 Thread Alexander Ivanov

data_end field in BDRVParallelsState is set to the biggest offset present
in BAT. If this offset is outside of the image, any further write
will create the cluster at this offset and/or the image will be truncated
to this offset on close. This is definitely not correct.
Raise an error in parallels_open() if data_end points outside the image
and it is not a check (let the check to repaire the image).
Set data_end to the end of the cluster with the last correct offset.

Signed-off-by: Alexander Ivanov 
Reviewed-by: Denis V. Lunev 
---
 block/parallels.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/block/parallels.c b/block/parallels.c
index bbea2f2221..4af68adc61 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -732,6 +732,7 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 BDRVParallelsState *s = bs->opaque;
 ParallelsHeader ph;
 int ret, size, i;
+int64_t file_size;
 QemuOpts *opts = NULL;
 Error *local_err = NULL;
 char *buf;
@@ -741,6 +742,12 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 return ret;
 }
 
+file_size = bdrv_getlength(bs->file->bs);
+if (file_size < 0) {
+return -EINVAL;
+}
+file_size >>= BDRV_SECTOR_BITS;
+
 ret = bdrv_pread(bs->file, 0, sizeof(ph), , 0);
 if (ret < 0) {
 goto fail;
@@ -805,6 +812,16 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 
 for (i = 0; i < s->bat_size; i++) {
 int64_t off = bat2sect(s, i);
+if (off >= file_size) {
+if (flags & BDRV_O_CHECK) {
+continue;
+}
+error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry "
+   "is larger than file size (%" PRIi64 ")",
+   off, i, file_size);
+ret = -EINVAL;
+goto fail;
+}
 if (off >= s->data_end) {
 s->data_end = off + s->tracks;
 }
-- 
2.34.1

[PATCH v8 05/11] parallels: Use generic infrastructure for BAT writing in parallels_co_check()

2023-01-15 Thread Alexander Ivanov

BAT is written in the context of conventional operations over
the image inside bdrv_co_flush() when it calls
parallels_co_flush_to_os() callback. Thus we should not
modify BAT array directly, but call parallels_set_bat_entry()
helper and bdrv_co_flush() further on. After that there is no
need to manually write BAT and track its modification.

This makes code more generic and allows to split
parallels_set_bat_entry() for independent pieces.

Signed-off-by: Alexander Ivanov 
Reviewed-by: Denis V. Lunev 
---
 block/parallels.c | 23 ++-
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 2144ecff7d..3ca4ec469b 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -425,9 +425,8 @@ static int coroutine_fn parallels_co_check(BlockDriverState 
*bs,
 {
 BDRVParallelsState *s = bs->opaque;
 int64_t size, prev_off, high_off;
-int ret;
+int ret = 0;
 uint32_t i;
-bool flush_bat = false;
 
 size = bdrv_getlength(bs->file->bs);
 if (size < 0) {
@@ -465,9 +464,8 @@ static int coroutine_fn parallels_co_check(BlockDriverState 
*bs,
 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
 res->corruptions++;
 if (fix & BDRV_FIX_ERRORS) {
-s->bat_bitmap[i] = 0;
+parallels_set_bat_entry(s, i, 0);
 res->corruptions_fixed++;
-flush_bat = true;
 }
 prev_off = 0;
 continue;
@@ -484,15 +482,6 @@ static int coroutine_fn 
parallels_co_check(BlockDriverState *bs,
 prev_off = off;
 }
 
-ret = 0;
-if (flush_bat) {
-ret = bdrv_co_pwrite_sync(bs->file, 0, s->header_size, s->header, 0);
-if (ret < 0) {
-res->check_errors++;
-goto out;
-}
-}
-
 res->image_end_offset = high_off + s->cluster_size;
 if (size > res->image_end_offset) {
 int64_t count;
@@ -523,6 +512,14 @@ static int coroutine_fn 
parallels_co_check(BlockDriverState *bs,
 
 out:
 qemu_co_mutex_unlock(>lock);
+
+if (ret == 0) {
+ret = bdrv_co_flush(bs);
+if (ret < 0) {
+res->check_errors++;
+}
+}
+
 return ret;
 }
 
-- 
2.34.1

[PATCH v8 10/11] parallels: Replace qemu_co_mutex_lock by WITH_QEMU_LOCK_GUARD

2023-01-15 Thread Alexander Ivanov

Replace the way we use mutex in parallels_co_check() for simplier
and less error prone code.

Signed-off-by: Alexander Ivanov 
Reviewed-by: Denis V. Lunev 
---
 block/parallels.c | 33 ++---
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 6e7f140e06..621dbf623a 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -561,30 +561,25 @@ static int coroutine_fn 
parallels_co_check(BlockDriverState *bs,
 BDRVParallelsState *s = bs->opaque;
 int ret;
 
-qemu_co_mutex_lock(>lock);
+WITH_QEMU_LOCK_GUARD(>lock) {
+parallels_check_unclean(bs, res, fix);
 
-parallels_check_unclean(bs, res, fix);
+ret = parallels_check_outside_image(bs, res, fix);
+if (ret < 0) {
+return ret;
+}
 
-ret = parallels_check_outside_image(bs, res, fix);
-if (ret < 0) {
-goto out;
-}
+ret = parallels_check_leak(bs, res, fix);
+if (ret < 0) {
+return ret;
+}
 
-ret = parallels_check_leak(bs, res, fix);
-if (ret < 0) {
-goto out;
+parallels_collect_statistics(bs, res, fix);
 }
 
-parallels_collect_statistics(bs, res, fix);
-
-out:
-qemu_co_mutex_unlock(>lock);
-
-if (ret == 0) {
-ret = bdrv_co_flush(bs);
-if (ret < 0) {
-res->check_errors++;
-}
+ret = bdrv_co_flush(bs);
+if (ret < 0) {
+res->check_errors++;
 }
 
 return ret;
-- 
2.34.1

[PATCH v8 02/11] parallels: Fix high_off calculation in parallels_co_check()

2023-01-15 Thread Alexander Ivanov

Don't let high_off be more than the file size
even if we don't fix the image.

Signed-off-by: Alexander Ivanov 
Reviewed-by: Denis V. Lunev 
---
 block/parallels.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 4af68adc61..436b36bbd9 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -460,12 +460,12 @@ static int coroutine_fn 
parallels_co_check(BlockDriverState *bs,
 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
 res->corruptions++;
 if (fix & BDRV_FIX_ERRORS) {
-prev_off = 0;
 s->bat_bitmap[i] = 0;
 res->corruptions_fixed++;
 flush_bat = true;
-continue;
 }
+prev_off = 0;
+continue;
 }
 
 res->bfi.allocated_clusters++;
-- 
2.34.1

[PATCH v8 06/11] parallels: Move check of unclean image to a separate function

2023-01-15 Thread Alexander Ivanov

We will add more and more checks so we need a better code structure
in parallels_co_check. Let each check performs in a separate loop
in a separate helper.

Signed-off-by: Alexander Ivanov 
Reviewed-by: Denis V. Lunev 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/parallels.c | 31 +--
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 3ca4ec469b..d48b447cca 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -418,6 +418,25 @@ static coroutine_fn int 
parallels_co_readv(BlockDriverState *bs,
 return ret;
 }
 
+static void parallels_check_unclean(BlockDriverState *bs,
+BdrvCheckResult *res,
+BdrvCheckMode fix)
+{
+BDRVParallelsState *s = bs->opaque;
+
+if (!s->header_unclean) {
+return;
+}
+
+fprintf(stderr, "%s image was not closed correctly\n",
+fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
+res->corruptions++;
+if (fix & BDRV_FIX_ERRORS) {
+/* parallels_close will do the job right */
+res->corruptions_fixed++;
+s->header_unclean = false;
+}
+}
 
 static int coroutine_fn parallels_co_check(BlockDriverState *bs,
BdrvCheckResult *res,
@@ -435,16 +454,8 @@ static int coroutine_fn 
parallels_co_check(BlockDriverState *bs,
 }
 
 qemu_co_mutex_lock(>lock);
-if (s->header_unclean) {
-fprintf(stderr, "%s image was not closed correctly\n",
-fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
-res->corruptions++;
-if (fix & BDRV_FIX_ERRORS) {
-/* parallels_close will do the job right */
-res->corruptions_fixed++;
-s->header_unclean = false;
-}
-}
+
+parallels_check_unclean(bs, res, fix);
 
 res->bfi.total_clusters = s->bat_size;
 res->bfi.compressed_clusters = 0; /* compression is not supported */
-- 
2.34.1

[PATCH v8 11/11] parallels: Incorrect condition in out-of-image check

2023-01-15 Thread Alexander Ivanov

All the offsets in the BAT must be lower than the file size.
Fix the check condition for correct check.

Signed-off-by: Alexander Ivanov 
Reviewed-by: Denis V. Lunev 
---
 block/parallels.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/parallels.c b/block/parallels.c
index 621dbf623a..eda3fb558d 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -455,7 +455,7 @@ static int parallels_check_outside_image(BlockDriverState 
*bs,
 high_off = 0;
 for (i = 0; i < s->bat_size; i++) {
 off = bat2sect(s, i) << BDRV_SECTOR_BITS;
-if (off > size) {
+if (off >= size) {
 fprintf(stderr, "%s cluster %u is outside image\n",
 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
 res->corruptions++;
-- 
2.34.1

[PATCH v8 08/11] parallels: Move check of leaks to a separate function

2023-01-15 Thread Alexander Ivanov

We will add more and more checks so we need a better code structure
in parallels_co_check. Let each check performs in a separate loop
in a separate helper.

Signed-off-by: Alexander Ivanov 
Reviewed-by: Denis V. Lunev 
---
 block/parallels.c | 84 +--
 1 file changed, 52 insertions(+), 32 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 3d06623355..5db099b1dd 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -475,14 +475,14 @@ static int parallels_check_outside_image(BlockDriverState 
*bs,
 return 0;
 }
 
-static int coroutine_fn parallels_co_check(BlockDriverState *bs,
-   BdrvCheckResult *res,
-   BdrvCheckMode fix)
+static int parallels_check_leak(BlockDriverState *bs,
+BdrvCheckResult *res,
+BdrvCheckMode fix)
 {
 BDRVParallelsState *s = bs->opaque;
-int64_t size, prev_off, high_off;
-int ret;
+int64_t size, off, high_off, count;
 uint32_t i;
+int ret;
 
 size = bdrv_getlength(bs->file->bs);
 if (size < 0) {
@@ -490,41 +490,16 @@ static int coroutine_fn 
parallels_co_check(BlockDriverState *bs,
 return size;
 }
 
-qemu_co_mutex_lock(>lock);
-
-parallels_check_unclean(bs, res, fix);
-
-ret = parallels_check_outside_image(bs, res, fix);
-if (ret < 0) {
-goto out;
-}
-
-res->bfi.total_clusters = s->bat_size;
-res->bfi.compressed_clusters = 0; /* compression is not supported */
-
 high_off = 0;
-prev_off = 0;
 for (i = 0; i < s->bat_size; i++) {
-int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
-if (off == 0) {
-prev_off = 0;
-continue;
-}
-
-res->bfi.allocated_clusters++;
+off = bat2sect(s, i) << BDRV_SECTOR_BITS;
 if (off > high_off) {
 high_off = off;
 }
-
-if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
-res->bfi.fragmented_clusters++;
-}
-prev_off = off;
 }
 
 res->image_end_offset = high_off + s->cluster_size;
 if (size > res->image_end_offset) {
-int64_t count;
 count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
 fprintf(stderr, "%s space leaked at the end of the image %" PRId64 
"\n",
 fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
@@ -542,12 +517,57 @@ static int coroutine_fn 
parallels_co_check(BlockDriverState *bs,
 if (ret < 0) {
 error_report_err(local_err);
 res->check_errors++;
-goto out;
+return ret;
 }
 res->leaks_fixed += count;
 }
 }
 
+return 0;
+}
+
+static int coroutine_fn parallels_co_check(BlockDriverState *bs,
+   BdrvCheckResult *res,
+   BdrvCheckMode fix)
+{
+BDRVParallelsState *s = bs->opaque;
+int64_t prev_off;
+int ret;
+uint32_t i;
+
+qemu_co_mutex_lock(>lock);
+
+parallels_check_unclean(bs, res, fix);
+
+ret = parallels_check_outside_image(bs, res, fix);
+if (ret < 0) {
+goto out;
+}
+
+ret = parallels_check_leak(bs, res, fix);
+if (ret < 0) {
+goto out;
+}
+
+res->bfi.total_clusters = s->bat_size;
+res->bfi.compressed_clusters = 0; /* compression is not supported */
+
+prev_off = 0;
+for (i = 0; i < s->bat_size; i++) {
+int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+if (off == 0) {
+prev_off = 0;
+continue;
+}
+
+res->bfi.allocated_clusters++;
+
+if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
+res->bfi.fragmented_clusters++;
+}
+prev_off = off;
+}
+
 out:
 qemu_co_mutex_unlock(>lock);
 
-- 
2.34.1

[PATCH v8 07/11] parallels: Move check of cluster outside image to a separate function

2023-01-15 Thread Alexander Ivanov

We will add more and more checks so we need a better code structure
in parallels_co_check. Let each check performs in a separate loop
in a separate helper.

Signed-off-by: Alexander Ivanov 
Reviewed-by: Denis V. Lunev 
---
 block/parallels.c | 59 ++-
 1 file changed, 43 insertions(+), 16 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index d48b447cca..3d06623355 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -438,13 +438,50 @@ static void parallels_check_unclean(BlockDriverState *bs,
 }
 }
 
+static int parallels_check_outside_image(BlockDriverState *bs,
+ BdrvCheckResult *res,
+ BdrvCheckMode fix)
+{
+BDRVParallelsState *s = bs->opaque;
+uint32_t i;
+int64_t off, high_off, size;
+
+size = bdrv_getlength(bs->file->bs);
+if (size < 0) {
+res->check_errors++;
+return size;
+}
+
+high_off = 0;
+for (i = 0; i < s->bat_size; i++) {
+off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+if (off > size) {
+fprintf(stderr, "%s cluster %u is outside image\n",
+fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
+res->corruptions++;
+if (fix & BDRV_FIX_ERRORS) {
+parallels_set_bat_entry(s, i, 0);
+res->corruptions_fixed++;
+}
+continue;
+}
+if (high_off < off) {
+high_off = off;
+}
+}
+
+s->data_end = (high_off + s->cluster_size) >> BDRV_SECTOR_BITS;
+
+return 0;
+}
+
 static int coroutine_fn parallels_co_check(BlockDriverState *bs,
BdrvCheckResult *res,
BdrvCheckMode fix)
 {
 BDRVParallelsState *s = bs->opaque;
 int64_t size, prev_off, high_off;
-int ret = 0;
+int ret;
 uint32_t i;
 
 size = bdrv_getlength(bs->file->bs);
@@ -457,6 +494,11 @@ static int coroutine_fn 
parallels_co_check(BlockDriverState *bs,
 
 parallels_check_unclean(bs, res, fix);
 
+ret = parallels_check_outside_image(bs, res, fix);
+if (ret < 0) {
+goto out;
+}
+
 res->bfi.total_clusters = s->bat_size;
 res->bfi.compressed_clusters = 0; /* compression is not supported */
 
@@ -469,19 +511,6 @@ static int coroutine_fn 
parallels_co_check(BlockDriverState *bs,
 continue;
 }
 
-/* cluster outside the image */
-if (off > size) {
-fprintf(stderr, "%s cluster %u is outside image\n",
-fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
-res->corruptions++;
-if (fix & BDRV_FIX_ERRORS) {
-parallels_set_bat_entry(s, i, 0);
-res->corruptions_fixed++;
-}
-prev_off = 0;
-continue;
-}
-
 res->bfi.allocated_clusters++;
 if (off > high_off) {
 high_off = off;
@@ -519,8 +548,6 @@ static int coroutine_fn parallels_co_check(BlockDriverState 
*bs,
 }
 }
 
-s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
-
 out:
 qemu_co_mutex_unlock(>lock);
 
-- 
2.34.1

[PATCH v8 00/11] parallels: Refactor the code of images checks and fix a bug

2023-01-15 Thread Alexander Ivanov

Fix image inflation when offset in BAT is out of image.

Replace whole BAT syncing by flushing only dirty blocks.

Move all the checks outside the main check function in
separate functions

Use WITH_QEMU_LOCK_GUARD for simplier code.

Fix incorrect condition in out-of-image check.

v8: Rebase on the top of the current master branch.

v7:
1,2: Fix string lengths in the commit messages.
3: Fix a typo in the commit message.

v6:
1: Move the error check inside the loop. Move file size getting
   to the function beginning. Skip out-of-image offsets.
2: A new patch - don't let high_off be more than the end of the last cluster.
3: Set data_end without any condition.
7: Move data_end setting to parallels_check_outside_image().
8: Remove s->data_end setting from parallels_check_leak().
   Fix 'i' type.

v5:
2: Change the way of data_end fixing.
6,7: Move data_end check to parallels_check_leak().

v4:
1: Move s->data_end fix to parallels_co_check(). Split the check
   in parallels_open() and the fix in parallels_co_check() to two patches.
2: A new patch - a part of the patch 1.
   Add a fix for data_end to parallels_co_check().
3: Move offset convertation to parallels_set_bat_entry().
4: Fix 'ret' rewriting by bdrv_co_flush() results.
7: Keep 'i' as uint32_t.

v3:

1-8: Fix commit message.

v2:

2: A new patch - a part of the splitted patch 2.
3: Patch order was changed so the replacement is done in parallels_co_check.
   Now we use a helper to set BAT entry and mark the block dirty.
4: Revert the condition with s->header_unclean.
5: Move unrelated helper parallels_set_bat_entry creation to a separate patch.
7: Move fragmentation counting code to this function too.
8: Fix an incorrect usage of WITH_QEMU_LOCK_GUARD.

Alexander Ivanov (11):
  parallels: Out of image offset in BAT leads to image inflation
  parallels: Fix high_off calculation in parallels_co_check()
  parallels: Fix data_end after out-of-image check
  parallels: create parallels_set_bat_entry_helper() to assign BAT value
  parallels: Use generic infrastructure for BAT writing in
parallels_co_check()
  parallels: Move check of unclean image to a separate function
  parallels: Move check of cluster outside image to a separate function
  parallels: Move check of leaks to a separate function
  parallels: Move statistic collection to a separate function
  parallels: Replace qemu_co_mutex_lock by WITH_QEMU_LOCK_GUARD
  parallels: Incorrect condition in out-of-image check

 block/parallels.c | 195 +-
 1 file changed, 139 insertions(+), 56 deletions(-)

-- 
2.34.1

[PATCH v8 04/11] parallels: create parallels_set_bat_entry_helper() to assign BAT value

2023-01-15 Thread Alexander Ivanov

This helper will be reused in next patches during parallels_co_check
rework to simplify its code.

Signed-off-by: Alexander Ivanov 
Reviewed-by: Denis V. Lunev 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/parallels.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 9fe0f33ba9..2144ecff7d 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -165,6 +165,13 @@ static int64_t block_status(BDRVParallelsState *s, int64_t 
sector_num,
 return start_off;
 }
 
+static void parallels_set_bat_entry(BDRVParallelsState *s,
+uint32_t index, uint32_t offset)
+{
+s->bat_bitmap[index] = cpu_to_le32(offset);
+bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 
1);
+}
+
 static coroutine_fn int64_t allocate_clusters(BlockDriverState *bs,
   int64_t sector_num,
   int nb_sectors, int *pnum)
@@ -251,10 +258,8 @@ static coroutine_fn int64_t 
allocate_clusters(BlockDriverState *bs,
 }
 
 for (i = 0; i < to_allocate; i++) {
-s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
+parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier);
 s->data_end += s->tracks;
-bitmap_set(s->bat_dirty_bmap,
-   bat_entry_off(idx + i) / s->bat_dirty_block, 1);
 }
 
 return bat2sect(s, idx) + sector_num % s->tracks;
-- 
2.34.1

Re: [PATCH 1/9] audio: don't check qemu_add_vm_change_state_handler failure

2023-01-15 Thread Volker Rümelin


Am 13.01.23 um 17:21 schrieb Daniel P. Berrangé:

This function cannot fail since g_malloc0 aborts on OOM.

Signed-off-by: Daniel P. Berrangé 
---
  audio/audio.c | 7 +--
  1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index d849a94a81..7b4b957945 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1712,7 +1712,6 @@ static AudioState *audio_init(Audiodev *dev, const char 
*name)
  size_t i;
  int done = 0;
  const char *drvname = NULL;
-VMChangeStateEntry *e;
  AudioState *s;
  struct audio_driver *driver;
  /* silence gcc warning about uninitialized variable */
@@ -1830,11 +1829,7 @@ static AudioState *audio_init(Audiodev *dev, const char 
*name)
  s->period_ticks = dev->timer_period * (int64_t)SCALE_US;
  }
  
-e = qemu_add_vm_change_state_handler (audio_vm_change_state_handler, s);

-if (!e) {
-dolog ("warning: Could not register change state handler\n"
-   "(Audio can continue looping even after stopping the VM)\n");
-}
+qemu_add_vm_change_state_handler (audio_vm_change_state_handler, s);


checkpatch.pl doesn't work properly here. It should report:
ERROR: space prohibited between function name and open parenthesis '('

With this changed,

Reviewed-by: Volker Rümelin 

  
  QTAILQ_INSERT_TAIL(_states, s, list);

  QLIST_INIT (>card_head);

Re: [PATCH v2] Windows installer: keep dependency cache

2023-01-15 Thread Arthur Sengileyev

Will resubmit v3 fixing this. Sorry for this. This is my first time using
the mailing list to submit changes.

Regards,
Arthur

On Sun, Jan 15, 2023 at 5:16 PM Bin Meng  wrote:

> On Sun, Jan 15, 2023 at 2:40 AM Arthur Sengileyev
>  wrote:
> >
> > It should be possible to reuse cache built by previous iteration
> > to process next executables. Processed dependencies are already
> > skipped later based on dll name.
> >
> > Changes for v2:
> > (1) changed variable name
> > (2) changed wording in description
>
> The changelog should not be in the commit message.
>
> >
> > Signed-off-by: Arthur Sengileyev 
> > Reviewed-by: Bin Meng 
> > ---
> >  scripts/nsis.py | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/scripts/nsis.py b/scripts/nsis.py
> > index 03ed7608a2..7cffba70ff 100644
> > --- a/scripts/nsis.py
> > +++ b/scripts/nsis.py
> > @@ -91,12 +91,13 @@ def main():
> >  print("Searching '%s' for the dependent dlls ..." % search_path)
> >  dlldir = os.path.join(destdir + prefix, "dll")
> >  os.mkdir(dlldir)
> > +deps_cache = set()
> >
> >  for exe in glob.glob(os.path.join(destdir + prefix, "*.exe")):
> >  signcode(exe)
> >
> >  # find all dll dependencies
> > -deps = set(find_deps(exe, search_path, set()))
> > +deps = set(find_deps(exe, search_path, deps_cache))
> >  deps.remove(exe)
> >
> >  # copy all dlls to the DLLDIR
> > --
>
> Regards,
> Bin
>

Re: [PATCH v2] Windows installer: keep dependency cache

2023-01-15 Thread Bin Meng

On Sun, Jan 15, 2023 at 2:40 AM Arthur Sengileyev
 wrote:
>
> It should be possible to reuse cache built by previous iteration
> to process next executables. Processed dependencies are already
> skipped later based on dll name.
>
> Changes for v2:
> (1) changed variable name
> (2) changed wording in description

The changelog should not be in the commit message.

>
> Signed-off-by: Arthur Sengileyev 
> Reviewed-by: Bin Meng 
> ---
>  scripts/nsis.py | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/scripts/nsis.py b/scripts/nsis.py
> index 03ed7608a2..7cffba70ff 100644
> --- a/scripts/nsis.py
> +++ b/scripts/nsis.py
> @@ -91,12 +91,13 @@ def main():
>  print("Searching '%s' for the dependent dlls ..." % search_path)
>  dlldir = os.path.join(destdir + prefix, "dll")
>  os.mkdir(dlldir)
> +deps_cache = set()
>
>  for exe in glob.glob(os.path.join(destdir + prefix, "*.exe")):
>  signcode(exe)
>
>  # find all dll dependencies
> -deps = set(find_deps(exe, search_path, set()))
> +deps = set(find_deps(exe, search_path, deps_cache))
>  deps.remove(exe)
>
>  # copy all dlls to the DLLDIR
> --

Regards,
Bin

Re: [PATCH 2/9] audio: remove special audio_calloc function

2023-01-15 Thread Volker Rümelin


Am 13.01.23 um 17:21 schrieb Daniel P. Berrangé:

The audio_calloc function does various checks on the size and
nmembers parameters to detect various error conditions. There
are only 5 callers

  * alsa_poll_helper: the pollfd count is small and bounded,
  * audio_pcm_create_voice_pair_: allocating a single fixed
size struct
  * audio_pcm_sw_alloc_resources_: samples could be negative
zero, or overflow, so needs a check
  * audio_pcm_hw_add_new_: voice size could be zero for
backends that don't support audio input
  * st_rate_start: allocating a single fixed size struct

IOW, only two of the callers need special error checks and
it is clearer if their respective checks are inlined. Thus
audio_calloc can be eliminated.


Hi Daniel,

my patch series at 
https://lists.nongnu.org/archive/html/qemu-devel/2022-12/msg02895.html 
also removes audio_calloc(). There will be merge conflicts.


With best regards,
Volker



Signed-off-by: Daniel P. Berrangé 
---
  audio/alsaaudio.c|  6 +-
  audio/audio.c| 20 
  audio/audio_int.h|  1 -
  audio/audio_template.h   | 28 ++--
  audio/mixeng.c   |  7 +--
  tests/qtest/fuzz-sb16-test.c |  6 --
  6 files changed, 20 insertions(+), 48 deletions(-)

diff --git a/audio/alsaaudio.c b/audio/alsaaudio.c
index 714bfb6453..5f50dfa0bf 100644
--- a/audio/alsaaudio.c
+++ b/audio/alsaaudio.c
@@ -222,11 +222,7 @@ static int alsa_poll_helper (snd_pcm_t *handle, struct 
pollhlp *hlp, int mask)
  return -1;
  }
  
-pfds = audio_calloc ("alsa_poll_helper", count, sizeof (*pfds));

-if (!pfds) {
-dolog ("Could not initialize poll mode\n");
-return -1;
-}
+pfds = g_new0(struct pollfd, count);
  
  err = snd_pcm_poll_descriptors (handle, pfds, count);

  if (err < 0) {
diff --git a/audio/audio.c b/audio/audio.c
index 7b4b957945..f397072a1f 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -146,26 +146,6 @@ static inline int audio_bits_to_index (int bits)
  }
  }
  
-void *audio_calloc (const char *funcname, int nmemb, size_t size)

-{
-int cond;
-size_t len;
-
-len = nmemb * size;
-cond = !nmemb || !size;
-cond |= nmemb < 0;
-cond |= len < size;
-
-if (audio_bug ("audio_calloc", cond)) {
-AUD_log (NULL, "%s passed invalid arguments to audio_calloc\n",
- funcname);
-AUD_log (NULL, "nmemb=%d size=%zu (len=%zu)\n", nmemb, size, len);
-return NULL;
-}
-
-return g_malloc0 (len);
-}
-
  void AUD_vlog (const char *cap, const char *fmt, va_list ap)
  {
  if (cap) {
diff --git a/audio/audio_int.h b/audio/audio_int.h
index e87ce014a0..b0cc2cd390 100644
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -251,7 +251,6 @@ void audio_pcm_init_info (struct audio_pcm_info *info, 
struct audsettings *as);
  void audio_pcm_info_clear_buf (struct audio_pcm_info *info, void *buf, int 
len);
  
  int audio_bug (const char *funcname, int cond);

-void *audio_calloc (const char *funcname, int nmemb, size_t size);
  
  void audio_run(AudioState *s, const char *msg);
  
diff --git a/audio/audio_template.h b/audio/audio_template.h

index 720a32e57e..564cbb1f01 100644
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -116,13 +116,20 @@ static int glue (audio_pcm_sw_alloc_resources_, TYPE) (SW 
*sw)
  samples = (int64_t)sw->HWBUF->size * sw->ratio >> 32;
  #endif
  
-sw->buf = audio_calloc(__func__, samples, sizeof(struct st_sample));

-if (!sw->buf) {
-dolog ("Could not allocate buffer for `%s' (%d samples)\n",
+if (audio_bug(__func__, samples <= 0)) {
+dolog ("Could not allocate buffer for '%s', samples %d <= 0\n",
 SW_NAME (sw), samples);
  return -1;
  }
  
+if (audio_bug(__func__, (SIZE_MAX / sizeof(struct st_sample) < samples))) {

+dolog ("Could not allocate buffer for '%s', samples %d overflows\n",
+   SW_NAME (sw), samples);
+return -1;
+}
+
+sw->buf = g_new0(struct st_sample, samples);
+
  #ifdef DAC
  sw->rate = st_rate_start (sw->info.freq, sw->hw->info.freq);
  #else
@@ -264,13 +271,12 @@ static HW *glue(audio_pcm_hw_add_new_, TYPE)(AudioState 
*s,
  return NULL;
  }
  
-hw = audio_calloc(__func__, 1, glue(drv->voice_size_, TYPE));

-if (!hw) {
-dolog ("Can not allocate voice `%s' size %d\n",
-   drv->name, glue (drv->voice_size_, TYPE));
+if (audio_bug(__func__, glue(drv->voice_size_, TYPE) == 0)) {
+dolog ("Voice size is zero");
  return NULL;
  }
  
+hw = g_malloc0(glue(drv->voice_size_, TYPE));

  hw->s = s;
  hw->pcm_ops = drv->pcm_ops;
  
@@ -398,12 +404,7 @@ static SW *glue(audio_pcm_create_voice_pair_, TYPE)(

  hw_as = *as;
  }
  
-sw = audio_calloc(__func__, 1, sizeof(*sw));

-if (!sw) {
-dolog ("Could not allocate soft voice

Re: [PATCH 00/17] audio: improve callback interface for audio frontends

2023-01-15 Thread Volker Rümelin


Am 15.01.23 um 14:08 schrieb Volker Rümelin:

Ccing a few more people who might be interested in this patch series.

@Mark:
After this patch series, the code in your out of tree ASC audio device 
(and a few in tree audio devices) could be simplified. write_audio() and 
the loops calling write_audio() could be removed.


With best regards,
Volker


Based-on: <3b1404eb-a7c5-f64c-3e47-1397c54c4...@t-online.de>
([PATCH 00/11] audio: more improvements)

The callback interface for emulated audio devices is strange. The 
callback function has an 'avail' parameter that passes the number of 
bytes that can be written or read. Unfortunately, this value sometimes 
is only an imprecise estimate and the callback functions must check 
the actual bytes written or read. For playback devices, this means 
that they either need a ring buffer or have to write the unwritten 
bytes again the next time. For recording devices, things are a bit 
easier. They only need to continue with the actual number of bytes read.


After this patch series, the 'avail' argument for the -audiodev 
out.mixing-engine=on and in.mixing-engine=on cases is exact. Audio 
frontends only need a linear frame buffer and there's a guarantee they 
can write or read 'avail' bytes.


The -audiodev out.mixing-engine=off case is also mostly accurate. Only 
the D-Bus audio backend is still missing a required function. The 
-audiodev in.mixing-engine=off case always passes a much too large 
'avail' value. I haven't worked on this yet, because there was no 
reason for it so far.


The following logs show the improvements. Not only the audio frontends 
can write or read all needed or available bytes. The same is true for 
the audio backends. For playback, the first six lines in the logs are 
expected. Here you can see how quickly the guest fills the empty 
downstream buffers after playback starts.


QEMU was started with -device ich9-intel-hda,addr=0x1b -device 
hda-duplex,audiodev=audio0 -audiodev 
pa,out.frequency=96000,in.frequency=96000,id=audio0


playback guest 44100Hz => host 96000Hz

unpatched version:
hda_audio_output_cb: to write 8188, written 1704
audio_run_out: free 4458, played 926
hda_audio_output_cb: to write 6488, written 2384
audio_run_out: free 3532, played 1297
hda_audio_output_cb: to write 4104, written 2648
audio_run_out: free 2235, played 1441
audio_run_out: free 794, played 793
audio_run_out: free 897, played 896
audio_run_out: free 831, played 829
...
hda_audio_output_cb: could not write 4 bytes
hda_audio_output_cb: to write 1764, written 1760
audio_run_out: free 960, played 958
...

patched version:
hda_audio_output_cb: to write 8192, written 1620
audio_run_out: free 4458, played 880
hda_audio_output_cb: to write 6576, written 2508
audio_run_out: free 3578, played 1365
hda_audio_output_cb: to write 4068, written 2500
audio_run_out: free 2213, played 1360

record host 96000Hz => guest 44100Hz

unpatched version:
audio_run_in: avail 4458, acquired 4454
audio_run_in: avail 1574, acquired 1572
audio_run_in: avail 766, acquired 764
audio_run_in: avail 1052, acquired 1051
audio_run_in: avail 761, acquired 760
audio_run_in: avail 1123, acquired 1121
...
hda_audio_input_cb: could not read 4 bytes
hda_audio_input_cb: to read 1988, read 1984
audio_run_in: avail 1082, acquired 1080
...

patched version:
(no output)

QEMU was started with -device ich9-intel-hda,addr=0x1b -device 
hda-duplex,audiodev=audio0 -audiodev 
pa,out.frequency=32000,in.frequency=32000,id=audio0


playback guest 44100Hz => host 32000Hz

unpatched version:
hda_audio_output_cb: to write 8188, written 1620
audio_run_out: free 1486, played 294
hda_audio_output_cb: to write 6568, written 2512
audio_run_out: free 1192, played 455
hda_audio_output_cb: to write 4060, written 2504
audio_run_out: free 737, played 455
audio_run_out: free 282, played 281
audio_run_out: free 357, played 356
audio_run_out: free 314, played 313
...
hda_audio_output_cb: could not write 4 bytes
hda_audio_output_cb: to write 1416, written 1412
audio_run_out: free 257, played 256
...

patched version:
hda_audio_output_cb: to write 8192, written 1656
audio_run_out: free 1486, played 300
hda_audio_output_cb: to write 6536, written 2516
audio_run_out: free 1186, played 457
hda_audio_output_cb: to write 4020, written 2540
audio_run_out: free 729, played 460

record host 32000Hz => guest 44100Hz

unpatched version:
audio_run_in: avail 1486, acquired 1485
audio_run_in: avail 272, acquired 271
audio_run_in: avail 366, acquired 365
hda_audio_input_cb: could not read 4 bytes
hda_audio_input_cb: to read 1420, read 1416
audio_run_in: avail 258, acquired 257
audio_run_in: avail 375, acquired 374
hda_audio_input_cb: could not read 4 bytes
hda_audio_input_cb: to read 2056, read 2052
audio_run_in: avail 260, acquired 259
...

patched version:
(no output)

This is the debug code for the logs above.

---snip--
--- a/audio/audio.c    2022-12-13 19:14:31.793153558 +0100
+++ b/audio/audio.c    2022-12-11 16:24:48.842649711 +0100
@@

[PATCH 09/17] audio/mixeng: calculate number of input frames

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

Calculate the exact number of audio input frames needed to get
a given number of audio output frames. The exact number of
frames depends only on the difference of opos - ipos and the
number of output frames. When downsampling, this function
returns the maximum number of input frames needed.

This function will later replace the audio_frontend_frames_out()
function, which calculates the average number of input frames
rounded down to the nearest integer.

Signed-off-by: Volker Rümelin 
---
 audio/mixeng.c | 36 
 audio/mixeng.h |  1 +
 2 files changed, 37 insertions(+)

diff --git a/audio/mixeng.c b/audio/mixeng.c
index fe454e0725..6bb3d54f77 100644
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -440,6 +440,42 @@ void st_rate_stop (void *opaque)
 g_free (opaque);
 }
 
+/**
+ * st_rate_frames_in() - returns the number of frames needed to
+ * get frames_out frames after resampling
+ *
+ * @opaque: pointer to struct rate
+ * @frames_out: number of frames
+ */
+uint32_t st_rate_frames_in(void *opaque, uint32_t frames_out)
+{
+struct rate *rate = opaque;
+uint64_t opos_start, opos_end;
+uint32_t ipos_start, ipos_end;
+
+if (rate->opos_inc == 1ULL << 32) {
+return frames_out;
+}
+
+if (frames_out) {
+opos_start = rate->opos;
+ipos_start = rate->ipos;
+} else {
+uint64_t offset;
+
+/* add offset = ceil(opos_inc) to opos and ipos to avoid an underflow 
*/
+offset = (rate->opos_inc + (1ULL << 32) - 1) & ~((1ULL << 32) - 1);
+opos_start = rate->opos + offset;
+ipos_start = rate->ipos + (offset >> 32);
+}
+/* last frame written was at opos_start - rate->opos_inc */
+opos_end = opos_start - rate->opos_inc + rate->opos_inc * frames_out;
+ipos_end = (opos_end >> 32) + 1;
+
+/* last frame read was at ipos_start - 1 */
+return ipos_end + 1 > ipos_start ? ipos_end + 1 - ipos_start : 0;
+}
+
 void mixeng_clear (struct st_sample *buf, int len)
 {
 memset (buf, 0, len * sizeof (struct st_sample));
diff --git a/audio/mixeng.h b/audio/mixeng.h
index 2dcd6df245..64c1e231cc 100644
--- a/audio/mixeng.h
+++ b/audio/mixeng.h
@@ -52,6 +52,7 @@ void st_rate_flow(void *opaque, st_sample *ibuf, st_sample 
*obuf,
 void st_rate_flow_mix(void *opaque, st_sample *ibuf, st_sample *obuf,
   size_t *isamp, size_t *osamp);
 void st_rate_stop (void *opaque);
+uint32_t st_rate_frames_in(void *opaque, uint32_t frames_out);
 void mixeng_clear (struct st_sample *buf, int len);
 void mixeng_volume (struct st_sample *buf, int len, struct mixeng_volume *vol);
 
-- 
2.35.3

[PATCH 15/17] audio: handle leftover audio frame from upsampling

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

Upsampling may leave one remaining audio frame in the input
buffer. The emulated audio playback devices are currently
resposible to write this audio frame again in the next write
cycle. Push that task down to audio_pcm_sw_write.

This is another step towards an audio callback interface that
guarantees that when audio frontends are told they can write
n audio frames, they can actually do so.

Signed-off-by: Volker Rümelin 
---
 audio/audio.c  | 34 --
 audio/audio_template.h |  5 +++--
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index ecd5d31260..b846b89a27 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -729,16 +729,21 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void 
*buf, size_t buf_len)
 hw_free = hw_free > live ? hw_free - live : 0;
 frames_out_max = MIN(dead, hw_free);
 sw_max = st_rate_frames_in(sw->rate, frames_out_max);
-fe_max = MIN(buf_len / sw->info.bytes_per_frame, sw->resample_buf.size);
+fe_max = MIN(buf_len / sw->info.bytes_per_frame + sw->resample_buf.pos,
+ sw->resample_buf.size);
 frames_in_max = MIN(sw_max, fe_max);
 
 if (!frames_in_max) {
 return 0;
 }
 
-sw->conv(sw->resample_buf.buffer, buf, frames_in_max);
-if (!sw->hw->pcm_ops->volume_out) {
-mixeng_volume(sw->resample_buf.buffer, frames_in_max, >vol);
+if (frames_in_max > sw->resample_buf.pos) {
+sw->conv(sw->resample_buf.buffer + sw->resample_buf.pos,
+ buf, frames_in_max - sw->resample_buf.pos);
+if (!sw->hw->pcm_ops->volume_out) {
+mixeng_volume(sw->resample_buf.buffer + sw->resample_buf.pos,
+  frames_in_max - sw->resample_buf.pos, >vol);
+}
 }
 
 audio_pcm_sw_resample_out(sw, frames_in_max, frames_out_max,
@@ -747,6 +752,22 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void 
*buf, size_t buf_len)
 sw->total_hw_samples_mixed += total_out;
 sw->empty = sw->total_hw_samples_mixed == 0;
 
+/*
+ * Upsampling may leave one audio frame in the resample buffer. Decrement
+ * total_in by one if there was a leftover frame from the previous resample
+ * pass in the resample buffer. Increment total_in by one if the current
+ * resample pass left one frame in the resample buffer.
+ */
+if (frames_in_max - total_in == 1) {
+/* copy one leftover audio frame to the beginning of the buffer */
+*sw->resample_buf.buffer = *(sw->resample_buf.buffer + total_in);
+total_in += 1 - sw->resample_buf.pos;
+sw->resample_buf.pos = 1;
+} else if (total_in >= sw->resample_buf.pos) {
+total_in -= sw->resample_buf.pos;
+sw->resample_buf.pos = 0;
+}
+
 #ifdef DEBUG_OUT
 dolog (
 "%s: write size %zu written %zu total mixed %zu\n",
@@ -1153,8 +1174,9 @@ static void audio_run_out (AudioState *s)
 } else {
 free = 0;
 }
-if (free > 0) {
-free = MIN(free, sw->resample_buf.size);
+if (free > sw->resample_buf.pos) {
+free = MIN(free, sw->resample_buf.size)
+   - sw->resample_buf.pos;
 sw->callback.fn(sw->callback.opaque,
 free * sw->info.bytes_per_frame);
 }
diff --git a/audio/audio_template.h b/audio/audio_template.h
index 07c14e7821..a9a550a3b7 100644
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -132,8 +132,9 @@ static int glue (audio_pcm_sw_alloc_resources_, TYPE) (SW 
*sw)
 return -1;
 }
 
-sw->resample_buf.buffer = g_new0(st_sample, samples);
-sw->resample_buf.size = samples;
+/* allocate one additional audio frame that is needed for upsampling */
+sw->resample_buf.buffer = g_new0(st_sample, samples + 1);
+sw->resample_buf.size = samples + 1;
 sw->resample_buf.pos = 0;
 
 #ifdef DAC
-- 
2.35.3

[PATCH 02/17] audio: change type and name of the resample buffer

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

Change the type of the resample buffer from struct st_sample *
to STSampleBuffer. Also change the name from buf to resample_buf
for better readability.

The new variables resample_buf.size and resample_buf.pos will be
used after the next patches. There is no functional change.

Signed-off-by: Volker Rümelin 
---
 audio/audio.c  | 15 ---
 audio/audio_int.h  |  4 ++--
 audio/audio_template.h | 10 ++
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index 6a17b3bb2f..22ec7d3093 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -553,7 +553,7 @@ static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, 
size_t size)
 {
 HWVoiceIn *hw = sw->hw;
 size_t samples, live, ret = 0, swlim, isamp, osamp, rpos, total = 0;
-struct st_sample *src, *dst = sw->buf;
+struct st_sample *src, *dst = sw->resample_buf.buffer;
 
 live = hw->total_samples_captured - sw->total_hw_samples_acquired;
 if (!live) {
@@ -593,10 +593,10 @@ static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, 
size_t size)
 }
 
 if (!hw->pcm_ops->volume_in) {
-mixeng_volume (sw->buf, ret, >vol);
+mixeng_volume(sw->resample_buf.buffer, ret, >vol);
 }
 
-sw->clip (buf, sw->buf, ret);
+sw->clip(buf, sw->resample_buf.buffer, ret);
 sw->total_hw_samples_acquired += total;
 return ret * sw->info.bytes_per_frame;
 }
@@ -704,10 +704,10 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void 
*buf, size_t size)
 samples = ((int64_t)MIN(dead, hw_free) << 32) / sw->ratio;
 samples = MIN(samples, size / sw->info.bytes_per_frame);
 if (samples) {
-sw->conv(sw->buf, buf, samples);
+sw->conv(sw->resample_buf.buffer, buf, samples);
 
 if (!sw->hw->pcm_ops->volume_out) {
-mixeng_volume(sw->buf, samples, >vol);
+mixeng_volume(sw->resample_buf.buffer, samples, >vol);
 }
 }
 
@@ -722,7 +722,7 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, 
size_t size)
 osamp = blck;
 st_rate_flow_mix (
 sw->rate,
-sw->buf + pos,
+sw->resample_buf.buffer + pos,
 sw->hw->mix_buf.buffer + wpos,
 ,
 
@@ -1059,7 +1059,8 @@ static void audio_capture_mix_and_clear(HWVoiceOut *hw, 
size_t rpos,
 size_t bytes = to_write * hw->info.bytes_per_frame;
 size_t written;
 
-sw->buf = hw->mix_buf.buffer + rpos2;
+sw->resample_buf.buffer = hw->mix_buf.buffer + rpos2;
+sw->resample_buf.size = to_write;
 written = audio_pcm_sw_write (sw, NULL, bytes);
 if (written - bytes) {
 dolog("Could not mix %zu bytes into a capture "
diff --git a/audio/audio_int.h b/audio/audio_int.h
index 900b0a6255..f4ec5dcf11 100644
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -109,7 +109,7 @@ struct SWVoiceOut {
 struct audio_pcm_info info;
 t_sample *conv;
 int64_t ratio;
-struct st_sample *buf;
+STSampleBuffer resample_buf;
 void *rate;
 size_t total_hw_samples_mixed;
 int active;
@@ -129,7 +129,7 @@ struct SWVoiceIn {
 int64_t ratio;
 void *rate;
 size_t total_hw_samples_acquired;
-struct st_sample *buf;
+STSampleBuffer resample_buf;
 f_sample *clip;
 HWVoiceIn *hw;
 char *name;
diff --git a/audio/audio_template.h b/audio/audio_template.h
index 9283f00e9e..07c14e7821 100644
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -95,13 +95,13 @@ static void glue(audio_pcm_hw_alloc_resources_, TYPE)(HW 
*hw)
 
 static void glue (audio_pcm_sw_free_resources_, TYPE) (SW *sw)
 {
-g_free (sw->buf);
+g_free(sw->resample_buf.buffer);
+sw->resample_buf.buffer = NULL;
+sw->resample_buf.size = 0;
 
 if (sw->rate) {
 st_rate_stop (sw->rate);
 }
-
-sw->buf = NULL;
 sw->rate = NULL;
 }
 
@@ -132,7 +132,9 @@ static int glue (audio_pcm_sw_alloc_resources_, TYPE) (SW 
*sw)
 return -1;
 }
 
-sw->buf = g_new0(st_sample, samples);
+sw->resample_buf.buffer = g_new0(st_sample, samples);
+sw->resample_buf.size = samples;
+sw->resample_buf.pos = 0;
 
 #ifdef DAC
 sw->rate = st_rate_start (sw->info.freq, sw->hw->info.freq);
-- 
2.35.3

[PATCH 12/17] audio: rename variables in audio_pcm_sw_read()

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

The audio_pcm_sw_read() function uses a few very unspecific
variable names. Rename them for better readability.

ret => total_out
total => total_in
size => buf_len
samples => frames_out_max

Signed-off-by: Volker Rümelin 
---
 audio/audio.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index 83bac97fa4..b660569928 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -574,10 +574,10 @@ static void audio_pcm_sw_resample_in(SWVoiceIn *sw,
 }
 }
 
-static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, size_t size)
+static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, size_t buf_len)
 {
 HWVoiceIn *hw = sw->hw;
-size_t samples, live, ret, swlim, total;
+size_t live, frames_out_max, swlim, total_in, total_out;
 
 live = hw->total_samples_captured - sw->total_hw_samples_acquired;
 if (!live) {
@@ -588,20 +588,20 @@ static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, 
size_t size)
 return 0;
 }
 
-samples = size / sw->info.bytes_per_frame;
+frames_out_max = buf_len / sw->info.bytes_per_frame;
 
 swlim = (live * sw->ratio) >> 32;
-swlim = MIN (swlim, samples);
+swlim = MIN(swlim, frames_out_max);
 
-audio_pcm_sw_resample_in(sw, live, swlim, , );
+audio_pcm_sw_resample_in(sw, live, swlim, _in, _out);
 
 if (!hw->pcm_ops->volume_in) {
-mixeng_volume(sw->resample_buf.buffer, ret, >vol);
+mixeng_volume(sw->resample_buf.buffer, total_out, >vol);
 }
+sw->clip(buf, sw->resample_buf.buffer, total_out);
 
-sw->clip(buf, sw->resample_buf.buffer, ret);
-sw->total_hw_samples_acquired += total;
-return ret * sw->info.bytes_per_frame;
+sw->total_hw_samples_acquired += total_in;
+return total_out * sw->info.bytes_per_frame;
 }
 
 /*
-- 
2.35.3

[PATCH 17/17] audio: remove sw->ratio

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

Simplify the resample buffer size calculation.

For audio playback we have
sw->ratio = ((int64_t)sw->hw->info.freq << 32) / sw->info.freq;
samples = ((int64_t)sw->HWBUF.size << 32) / sw->ratio;

This can be simplified to
samples = muldiv64(sw->HWBUF.size, sw->info.freq, sw->hw->info.freq);

For audio recording we have
sw->ratio = ((int64_t)sw->info.freq << 32) / sw->hw->info.freq;
samples = (int64_t)sw->HWBUF.size * sw->ratio >> 32;

This can be simplified to
samples = muldiv64(sw->HWBUF.size, sw->info.freq, sw->hw->info.freq);

With hw = sw->hw this becomes in both cases
samples = muldiv64(HWBUF.size, sw->info.freq, hw->info.freq);

Now that sw->ratio is no longer needed, remove sw->ratio.

Signed-off-by: Volker Rümelin 
---
 audio/audio.c  | 1 -
 audio/audio_int.h  | 2 --
 audio/audio_template.h | 9 +
 3 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index b846b89a27..b68ed4eb68 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -476,7 +476,6 @@ static int audio_attach_capture (HWVoiceOut *hw)
 sw->info = hw->info;
 sw->empty = 1;
 sw->active = hw->enabled;
-sw->ratio = ((int64_t) hw_cap->info.freq << 32) / sw->info.freq;
 sw->vol = nominal_volume;
 sw->rate = st_rate_start (sw->info.freq, hw_cap->info.freq);
 QLIST_INSERT_HEAD (_cap->sw_head, sw, entries);
diff --git a/audio/audio_int.h b/audio/audio_int.h
index f4ec5dcf11..3cd3539bd4 100644
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -108,7 +108,6 @@ struct SWVoiceOut {
 AudioState *s;
 struct audio_pcm_info info;
 t_sample *conv;
-int64_t ratio;
 STSampleBuffer resample_buf;
 void *rate;
 size_t total_hw_samples_mixed;
@@ -126,7 +125,6 @@ struct SWVoiceIn {
 AudioState *s;
 int active;
 struct audio_pcm_info info;
-int64_t ratio;
 void *rate;
 size_t total_hw_samples_acquired;
 STSampleBuffer resample_buf;
diff --git a/audio/audio_template.h b/audio/audio_template.h
index 0cdf57760e..c053792da3 100644
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -114,11 +114,7 @@ static int glue (audio_pcm_sw_alloc_resources_, TYPE) (SW 
*sw)
 return 0;
 }
 
-#ifdef DAC
-samples = ((int64_t)sw->HWBUF.size << 32) / sw->ratio;
-#else
-samples = (int64_t)sw->HWBUF.size * sw->ratio >> 32;
-#endif
+samples = muldiv64(HWBUF.size, sw->info.freq, hw->info.freq);
 if (samples == 0) {
 size_t f_fe_min;
 
@@ -159,11 +155,8 @@ static int glue (audio_pcm_sw_init_, TYPE) (
 sw->hw = hw;
 sw->active = 0;
 #ifdef DAC
-sw->ratio = ((int64_t) sw->hw->info.freq << 32) / sw->info.freq;
 sw->total_hw_samples_mixed = 0;
 sw->empty = 1;
-#else
-sw->ratio = ((int64_t) sw->info.freq << 32) / sw->hw->info.freq;
 #endif
 
 if (sw->info.is_float) {
-- 
2.35.3

[PATCH 13/17] audio/mixeng: calculate number of output frames

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

Calculate the exact number of audio output frames the resampling
code can generate from a given number of audio input frames.
When upsampling, this function returns the maximum number of
output frames.

This function will later replace the audio_frontend_frames_in()
function, which calculates the average number of output frames
rounded down to the nearest integer.

Signed-off-by: Volker Rümelin 
---
 audio/mixeng.c | 37 +
 audio/mixeng.h |  1 +
 2 files changed, 38 insertions(+)

diff --git a/audio/mixeng.c b/audio/mixeng.c
index 6bb3d54f77..92a3a1ac58 100644
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -440,6 +440,43 @@ void st_rate_stop (void *opaque)
 g_free (opaque);
 }
 
+/**
+ * st_rate_frames_out() - returns the number of frames the resampling code
+ * generates from frames_in frames
+ *
+ * @opaque: pointer to struct rate
+ * @frames_in: number of frames
+ */
+uint32_t st_rate_frames_out(void *opaque, uint32_t frames_in)
+{
+struct rate *rate = opaque;
+uint64_t opos_end, opos_delta;
+uint32_t ipos_end;
+uint32_t frames_out;
+
+if (rate->opos_inc == 1ULL << 32) {
+return frames_in;
+}
+
+/* no output frame without at least one input frame */
+if (!frames_in) {
+return 0;
+}
+
+/* last frame read was at rate->ipos - 1 */
+ipos_end = rate->ipos - 1 + frames_in;
+opos_end = (uint64_t)ipos_end << 32;
+
+/* last frame written was at rate->opos - rate->opos_inc */
+if (opos_end + rate->opos_inc <= rate->opos) {
+return 0;
+}
+opos_delta = opos_end - rate->opos + rate->opos_inc;
+frames_out = opos_delta / rate->opos_inc;
+
+return opos_delta % rate->opos_inc ? frames_out : frames_out - 1;
+}
+
 /**
  * st_rate_frames_in() - returns the number of frames needed to
  * get frames_out frames after resampling
diff --git a/audio/mixeng.h b/audio/mixeng.h
index 64c1e231cc..f9de7cffeb 100644
--- a/audio/mixeng.h
+++ b/audio/mixeng.h
@@ -52,6 +52,7 @@ void st_rate_flow(void *opaque, st_sample *ibuf, st_sample 
*obuf,
 void st_rate_flow_mix(void *opaque, st_sample *ibuf, st_sample *obuf,
   size_t *isamp, size_t *osamp);
 void st_rate_stop (void *opaque);
+uint32_t st_rate_frames_out(void *opaque, uint32_t frames_in);
 uint32_t st_rate_frames_in(void *opaque, uint32_t frames_out);
 void mixeng_clear (struct st_sample *buf, int len);
 void mixeng_volume (struct st_sample *buf, int len, struct mixeng_volume *vol);
-- 
2.35.3

[PATCH 06/17] audio: rename variables in audio_pcm_sw_write()

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

The audio_pcm_sw_write() function uses a lot of very unspecific
variable names. Rename them for better readability.

ret => total_in
total => total_out
size => buf_len
hwsamples => hw->mix_buf.size
samples => frames_in_max

Signed-off-by: Volker Rümelin 
---
 audio/audio.c | 45 ++---
 1 file changed, 22 insertions(+), 23 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index 9d6ffa500a..a8571100ff 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -704,56 +704,55 @@ static void audio_pcm_sw_resample_out(SWVoiceOut *sw,
 }
 }
 
-static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, size_t size)
+static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, size_t buf_len)
 {
-size_t hwsamples, samples, live, dead;
-size_t hw_free;
-size_t ret, total;
-
-hwsamples = sw->hw->mix_buf.size;
+HWVoiceOut *hw = sw->hw;
+size_t live, dead, hw_free;
+size_t frames_in_max, total_in, total_out;
 
 live = sw->total_hw_samples_mixed;
-if (audio_bug(__func__, live > hwsamples)) {
-dolog("live=%zu hw->mix_buf.size=%zu\n", live, hwsamples);
+if (audio_bug(__func__, live > hw->mix_buf.size)) {
+dolog("live=%zu hw->mix_buf.size=%zu\n", live, hw->mix_buf.size);
 return 0;
 }
 
-if (live == hwsamples) {
+if (live == hw->mix_buf.size) {
 #ifdef DEBUG_OUT
 dolog ("%s is full %zu\n", sw->name, live);
 #endif
 return 0;
 }
 
-dead = hwsamples - live;
-hw_free = audio_pcm_hw_get_free(sw->hw);
+dead = hw->mix_buf.size - live;
+hw_free = audio_pcm_hw_get_free(hw);
 hw_free = hw_free > live ? hw_free - live : 0;
-samples = ((int64_t)MIN(dead, hw_free) << 32) / sw->ratio;
-samples = MIN(samples, size / sw->info.bytes_per_frame);
-if (samples) {
-sw->conv(sw->resample_buf.buffer, buf, samples);
+frames_in_max = ((int64_t)MIN(dead, hw_free) << 32) / sw->ratio;
+frames_in_max = MIN(frames_in_max, buf_len / sw->info.bytes_per_frame);
+if (frames_in_max) {
+sw->conv(sw->resample_buf.buffer, buf, frames_in_max);
 
 if (!sw->hw->pcm_ops->volume_out) {
-mixeng_volume(sw->resample_buf.buffer, samples, >vol);
+mixeng_volume(sw->resample_buf.buffer, frames_in_max, >vol);
 }
 }
 
-audio_pcm_sw_resample_out(sw, samples, MIN(dead, hw_free), , );
+audio_pcm_sw_resample_out(sw, frames_in_max, MIN(dead, hw_free),
+  _in, _out);
 
-sw->total_hw_samples_mixed += total;
+sw->total_hw_samples_mixed += total_out;
 sw->empty = sw->total_hw_samples_mixed == 0;
 
 #ifdef DEBUG_OUT
 dolog (
-"%s: write size %zu ret %zu total sw %zu\n",
-SW_NAME (sw),
-size / sw->info.bytes_per_frame,
-ret,
+"%s: write size %zu written %zu total mixed %zu\n",
+SW_NAME(sw),
+buf_len / sw->info.bytes_per_frame,
+total_in,
 sw->total_hw_samples_mixed
 );
 #endif
 
-return ret * sw->info.bytes_per_frame;
+return total_in * sw->info.bytes_per_frame;
 }
 
 #ifdef DEBUG_AUDIO
-- 
2.35.3

[PATCH 10/17] audio: wire up st_rate_frames_in()

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

Wire up the st_rate_frames_in() function and replace
audio_frontend_frames_out() to make audio packet length
calculation exact. When upsampling, it's still possible that
the audio frontends can't write the last audio frame. This will
be fixed later.

Signed-off-by: Volker Rümelin 
---
 audio/audio.c | 43 ++-
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index 9c0855fb13..3d3b5e5b91 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -699,8 +699,8 @@ static void audio_pcm_sw_resample_out(SWVoiceOut *sw,
 static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, size_t buf_len)
 {
 HWVoiceOut *hw = sw->hw;
-size_t live, dead, hw_free;
-size_t frames_in_max, total_in, total_out;
+size_t live, dead, hw_free, sw_max, fe_max;
+size_t frames_in_max, frames_out_max, total_in, total_out;
 
 live = sw->total_hw_samples_mixed;
 if (audio_bug(__func__, live > hw->mix_buf.size)) {
@@ -718,17 +718,21 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void 
*buf, size_t buf_len)
 dead = hw->mix_buf.size - live;
 hw_free = audio_pcm_hw_get_free(hw);
 hw_free = hw_free > live ? hw_free - live : 0;
-frames_in_max = ((int64_t)MIN(dead, hw_free) << 32) / sw->ratio;
-frames_in_max = MIN(frames_in_max, buf_len / sw->info.bytes_per_frame);
-if (frames_in_max) {
-sw->conv(sw->resample_buf.buffer, buf, frames_in_max);
+frames_out_max = MIN(dead, hw_free);
+sw_max = st_rate_frames_in(sw->rate, frames_out_max);
+fe_max = MIN(buf_len / sw->info.bytes_per_frame, sw->resample_buf.size);
+frames_in_max = MIN(sw_max, fe_max);
 
-if (!sw->hw->pcm_ops->volume_out) {
-mixeng_volume(sw->resample_buf.buffer, frames_in_max, >vol);
-}
+if (!frames_in_max) {
+return 0;
 }
 
-audio_pcm_sw_resample_out(sw, frames_in_max, MIN(dead, hw_free),
+sw->conv(sw->resample_buf.buffer, buf, frames_in_max);
+if (!sw->hw->pcm_ops->volume_out) {
+mixeng_volume(sw->resample_buf.buffer, frames_in_max, >vol);
+}
+
+audio_pcm_sw_resample_out(sw, frames_in_max, frames_out_max,
   _in, _out);
 
 sw->total_hw_samples_mixed += total_out;
@@ -998,18 +1002,6 @@ static size_t audio_get_avail (SWVoiceIn *sw)
 return live;
 }
 
-/**
- * audio_frontend_frames_out() - returns the number of frames needed to
- * get frames_out frames after resampling
- *
- * @sw: audio playback frontend
- * @frames_out: number of frames
- */
-static size_t audio_frontend_frames_out(SWVoiceOut *sw, size_t frames_out)
-{
-return ((int64_t)frames_out << 32) / sw->ratio;
-}
-
 static size_t audio_get_free(SWVoiceOut *sw)
 {
 size_t live, dead;
@@ -1029,8 +1021,8 @@ static size_t audio_get_free(SWVoiceOut *sw)
 dead = sw->hw->mix_buf.size - live;
 
 #ifdef DEBUG_OUT
-dolog("%s: get_free live %zu dead %zu frontend frames %zu\n",
-  SW_NAME(sw), live, dead, audio_frontend_frames_out(sw, dead));
+dolog("%s: get_free live %zu dead %zu frontend frames %u\n",
+  SW_NAME(sw), live, dead, st_rate_frames_in(sw->rate, dead));
 #endif
 
 return dead;
@@ -1159,12 +1151,13 @@ static void audio_run_out (AudioState *s)
 size_t free;
 
 if (hw_free > sw->total_hw_samples_mixed) {
-free = audio_frontend_frames_out(sw,
+free = st_rate_frames_in(sw->rate,
 MIN(sw_free, hw_free - sw->total_hw_samples_mixed));
 } else {
 free = 0;
 }
 if (free > 0) {
+free = MIN(free, sw->resample_buf.size);
 sw->callback.fn(sw->callback.opaque,
 free * sw->info.bytes_per_frame);
 }
-- 
2.35.3

[PATCH 08/17] audio: remove unused noop_conv() function

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

The function audio_capture_mix_and_clear() no longer uses
audio_pcm_sw_write() to resample audio frames from one internal
buffer to another. For this reason, the noop_conv() function is
now unused. Remove it.

Signed-off-by: Volker Rümelin 
---
 audio/audio.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index 0cfd56850f..9c0855fb13 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -379,13 +379,6 @@ void audio_pcm_info_clear_buf (struct audio_pcm_info 
*info, void *buf, int len)
 /*
  * Capture
  */
-static void noop_conv (struct st_sample *dst, const void *src, int samples)
-{
-(void) src;
-(void) dst;
-(void) samples;
-}
-
 static CaptureVoiceOut *audio_pcm_capture_find_specific(AudioState *s,
 struct audsettings *as)
 {
@@ -483,7 +476,6 @@ static int audio_attach_capture (HWVoiceOut *hw)
 sw->info = hw->info;
 sw->empty = 1;
 sw->active = hw->enabled;
-sw->conv = noop_conv;
 sw->ratio = ((int64_t) hw_cap->info.freq << 32) / sw->info.freq;
 sw->vol = nominal_volume;
 sw->rate = st_rate_start (sw->info.freq, hw_cap->info.freq);
-- 
2.35.3

[PATCH 11/17] audio: replace the resampling loop in audio_pcm_sw_read()

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

Replace the resampling loop in audio_pcm_sw_read() with the new
function audio_pcm_sw_resample_in(). Unlike the old resample
loop the new function will try to consume input frames even if
the output buffer is full. This is necessary when downsampling
to avoid reading less audio frames than calculated in advance.
The loop was unrolled to avoid complicated loop control conditions
in this case.

Signed-off-by: Volker Rümelin 
---
 audio/audio.c | 59 ++-
 1 file changed, 35 insertions(+), 24 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index 3d3b5e5b91..83bac97fa4 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -541,11 +541,43 @@ static size_t audio_pcm_hw_conv_in(HWVoiceIn *hw, void 
*pcm_buf, size_t samples)
 /*
  * Soft voice (capture)
  */
+static void audio_pcm_sw_resample_in(SWVoiceIn *sw,
+size_t frames_in_max, size_t frames_out_max,
+size_t *total_in, size_t *total_out)
+{
+HWVoiceIn *hw = sw->hw;
+struct st_sample *src, *dst;
+size_t live, rpos, frames_in, frames_out;
+
+live = hw->total_samples_captured - sw->total_hw_samples_acquired;
+rpos = audio_ring_posb(hw->conv_buf.pos, live, hw->conv_buf.size);
+
+/* resample conv_buf from rpos to end of buffer */
+src = hw->conv_buf.buffer + rpos;
+frames_in = MIN(live, hw->conv_buf.size - rpos);
+dst = sw->resample_buf.buffer;
+frames_out = frames_out_max;
+st_rate_flow(sw->rate, src, dst, _in, _out);
+rpos += frames_in;
+*total_in = frames_in;
+*total_out = frames_out;
+
+/* resample conv_buf from start of buffer if there are input frames left */
+if (live - frames_in && rpos == hw->conv_buf.size) {
+src = hw->conv_buf.buffer;
+frames_in = live - frames_in;
+dst += frames_out;
+frames_out = frames_out_max - frames_out;
+st_rate_flow(sw->rate, src, dst, _in, _out);
+*total_in += frames_in;
+*total_out += frames_out;
+}
+}
+
 static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, size_t size)
 {
 HWVoiceIn *hw = sw->hw;
-size_t samples, live, ret = 0, swlim, isamp, osamp, rpos, total = 0;
-struct st_sample *src, *dst = sw->resample_buf.buffer;
+size_t samples, live, ret, swlim, total;
 
 live = hw->total_samples_captured - sw->total_hw_samples_acquired;
 if (!live) {
@@ -556,33 +588,12 @@ static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, 
size_t size)
 return 0;
 }
 
-rpos = audio_ring_posb(hw->conv_buf.pos, live, hw->conv_buf.size);
-
 samples = size / sw->info.bytes_per_frame;
 
 swlim = (live * sw->ratio) >> 32;
 swlim = MIN (swlim, samples);
 
-while (swlim) {
-src = hw->conv_buf.buffer + rpos;
-if (hw->conv_buf.pos > rpos) {
-isamp = hw->conv_buf.pos - rpos;
-} else {
-isamp = hw->conv_buf.size - rpos;
-}
-
-if (!isamp) {
-break;
-}
-osamp = swlim;
-
-st_rate_flow (sw->rate, src, dst, , );
-swlim -= osamp;
-rpos = (rpos + isamp) % hw->conv_buf.size;
-dst += osamp;
-ret += osamp;
-total += isamp;
-}
+audio_pcm_sw_resample_in(sw, live, swlim, , );
 
 if (!hw->pcm_ops->volume_in) {
 mixeng_volume(sw->resample_buf.buffer, ret, >vol);
-- 
2.35.3

[PATCH 05/17] audio: remove sw == NULL check

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

All call sites of audio_pcm_sw_write() guarantee that sw is not
NULL. Remove the unnecessary NULL check.

Signed-off-by: Volker Rümelin 
---
 audio/audio.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index b0a270ba85..9d6ffa500a 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -710,10 +710,6 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void 
*buf, size_t size)
 size_t hw_free;
 size_t ret, total;
 
-if (!sw) {
-return size;
-}
-
 hwsamples = sw->hw->mix_buf.size;
 
 live = sw->total_hw_samples_mixed;
-- 
2.35.3

[PATCH 14/17] audio: wire up st_rate_frames_out()

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

Wire up the st_rate_frames_out() function and replace
audio_frontend_frames_in() to make audio packet length
calculation exact.

Signed-off-by: Volker Rümelin 
---
 audio/audio.c | 29 -
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index b660569928..ecd5d31260 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -577,7 +577,7 @@ static void audio_pcm_sw_resample_in(SWVoiceIn *sw,
 static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, size_t buf_len)
 {
 HWVoiceIn *hw = sw->hw;
-size_t live, frames_out_max, swlim, total_in, total_out;
+size_t live, frames_out_max, total_in, total_out;
 
 live = hw->total_samples_captured - sw->total_hw_samples_acquired;
 if (!live) {
@@ -588,12 +588,10 @@ static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, 
size_t buf_len)
 return 0;
 }
 
-frames_out_max = buf_len / sw->info.bytes_per_frame;
+frames_out_max = MIN(buf_len / sw->info.bytes_per_frame,
+ sw->resample_buf.size);
 
-swlim = (live * sw->ratio) >> 32;
-swlim = MIN(swlim, frames_out_max);
-
-audio_pcm_sw_resample_in(sw, live, swlim, _in, _out);
+audio_pcm_sw_resample_in(sw, live, frames_out_max, _in, _out);
 
 if (!hw->pcm_ops->volume_in) {
 mixeng_volume(sw->resample_buf.buffer, total_out, >vol);
@@ -977,18 +975,6 @@ void AUD_set_active_in (SWVoiceIn *sw, int on)
 }
 }
 
-/**
- * audio_frontend_frames_in() - returns the number of frames the resampling
- * code generates from frames_in frames
- *
- * @sw: audio recording frontend
- * @frames_in: number of frames
- */
-static size_t audio_frontend_frames_in(SWVoiceIn *sw, size_t frames_in)
-{
-return (int64_t)frames_in * sw->ratio >> 32;
-}
-
 static size_t audio_get_avail (SWVoiceIn *sw)
 {
 size_t live;
@@ -1005,9 +991,9 @@ static size_t audio_get_avail (SWVoiceIn *sw)
 }
 
 ldebug (
-"%s: get_avail live %zu frontend frames %zu\n",
+"%s: get_avail live %zu frontend frames %u\n",
 SW_NAME (sw),
-live, audio_frontend_frames_in(sw, live)
+live, st_rate_frames_out(sw->rate, live)
 );
 
 return live;
@@ -1312,8 +1298,9 @@ static void audio_run_in (AudioState *s)
 size_t sw_avail = audio_get_avail(sw);
 size_t avail;
 
-avail = audio_frontend_frames_in(sw, sw_avail);
+avail = st_rate_frames_out(sw->rate, sw_avail);
 if (avail > 0) {
+avail = MIN(avail, sw->resample_buf.size);
 sw->callback.fn(sw->callback.opaque,
 avail * sw->info.bytes_per_frame);
 }
-- 
2.35.3

[PATCH 16/17] audio/audio_template: substitute sw->hw with hw

2023-01-15 Thread Volker Rümelin

From: Volker Rümelin 

Substitute sw->hw with hw in the audio_pcm_sw_alloc_resources_*
functions.

Signed-off-by: Volker Rümelin 
---
 audio/audio_template.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/audio/audio_template.h b/audio/audio_template.h
index a9a550a3b7..0cdf57760e 100644
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -107,6 +107,7 @@ static void glue (audio_pcm_sw_free_resources_, TYPE) (SW 
*sw)
 
 static int glue (audio_pcm_sw_alloc_resources_, TYPE) (SW *sw)
 {
+HW *hw = sw->hw;
 int samples;
 
 if (!glue(audio_get_pdo_, TYPE)(sw->s->dev)->mixing_engine) {
@@ -119,7 +120,6 @@ static int glue (audio_pcm_sw_alloc_resources_, TYPE) (SW 
*sw)
 samples = (int64_t)sw->HWBUF.size * sw->ratio >> 32;
 #endif
 if (samples == 0) {
-HW *hw = sw->hw;
 size_t f_fe_min;
 
 /* f_fe_min = ceil(1 [frames] * f_be [Hz] / size_be [frames]) */
@@ -138,9 +138,9 @@ static int glue (audio_pcm_sw_alloc_resources_, TYPE) (SW 
*sw)
 sw->resample_buf.pos = 0;
 
 #ifdef DAC
-sw->rate = st_rate_start (sw->info.freq, sw->hw->info.freq);
+sw->rate = st_rate_start(sw->info.freq, hw->info.freq);
 #else
-sw->rate = st_rate_start (sw->hw->info.freq, sw->info.freq);
+sw->rate = st_rate_start(hw->info.freq, sw->info.freq);
 #endif
 
 return 0;
-- 
2.35.3

1 2 >

1 - 100 of 111 matches

Mail list logo