Re: [RESEND PATCH] hw/mem/nvdimm: fix error message for 'unarmed' flag

2022-10-19 Thread Pankaj Gupta
> In the ACPI specification [1], the 'unarmed' bit is set when a device
> cannot accept a persistent write. This means that when a memdev is
> read-only, the 'unarmed' flag must be turned on. The logic is correct,
> just changing the error message.
>
> [1] ACPI NFIT NVDIMM Region Mapping Structure "NVDIMM State Flags" Bit 3
>
> Signed-off-by: Julia Suvorova 
> Reviewed-by: Stefan Hajnoczi 
> ---
>  hw/mem/nvdimm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
> index 7c7d81..bfb76818c1 100644
> --- a/hw/mem/nvdimm.c
> +++ b/hw/mem/nvdimm.c
> @@ -149,7 +149,7 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice 
> *nvdimm, Error **errp)
>  if (!nvdimm->unarmed && memory_region_is_rom(mr)) {
>  HostMemoryBackend *hostmem = dimm->hostmem;
>
> -error_setg(errp, "'unarmed' property must be off since memdev %s "
> +error_setg(errp, "'unarmed' property must be on since memdev %s "
> "is read-only",
> object_get_canonical_path_component(OBJECT(hostmem)));
>  return;

With the suggested minor change.

Reviewed-by: Pankaj Gupta 



Re: [PATCH v5 4/6] hw/arm/virt: Introduce virt_get_high_memmap_enabled() helper

2022-10-19 Thread Eric Auger



On 10/12/22 01:18, Gavin Shan wrote:
> This introduces virt_get_high_memmap_enabled() helper, which returns
> the pointer to vms->highmem_{redists, ecam, mmio}. The pointer will
> be used in the subsequent patches.
>
> No functional change intended.
>
> Signed-off-by: Gavin Shan 
> Tested-by: Zhenyu Zhang 
Reviewed-by: Eric Auger 


Eric
> ---
>  hw/arm/virt.c | 32 +++-
>  1 file changed, 19 insertions(+), 13 deletions(-)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 0bf3cb7057..ee98a8a3b6 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -1689,14 +1689,31 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState 
> *vms, int idx)
>  return arm_cpu_mp_affinity(idx, clustersz);
>  }
>  
> +static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms,
> + int index)
> +{
> +bool *enabled_array[] = {
> +>highmem_redists,
> +>highmem_ecam,
> +>highmem_mmio,
> +};
> +
> +assert(ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST ==
> +   ARRAY_SIZE(enabled_array));
> +assert(index - VIRT_LOWMEMMAP_LAST < ARRAY_SIZE(enabled_array));
> +
> +return enabled_array[index - VIRT_LOWMEMMAP_LAST];
> +}
> +
>  static void virt_set_high_memmap(VirtMachineState *vms,
>   hwaddr base, int pa_bits)
>  {
>  hwaddr region_base, region_size;
> -bool fits;
> +bool *region_enabled, fits;
>  int i;
>  
>  for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
> +region_enabled = virt_get_high_memmap_enabled(vms, i);
>  region_base = ROUND_UP(base, extended_memmap[i].size);
>  region_size = extended_memmap[i].size;
>  
> @@ -1714,18 +1731,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
>  vms->highest_gpa = region_base + region_size - 1;
>  }
>  
> -switch (i) {
> -case VIRT_HIGH_GIC_REDIST2:
> -vms->highmem_redists &= fits;
> -break;
> -case VIRT_HIGH_PCIE_ECAM:
> -vms->highmem_ecam &= fits;
> -break;
> -case VIRT_HIGH_PCIE_MMIO:
> -vms->highmem_mmio &= fits;
> -break;
> -}
> -
> +*region_enabled &= fits;
>  base = region_base + region_size;
>  }
>  }




Re: [RFC PATCH v2 7/8] vhost_net: return VIRTIO_NET_S_ANNOUNCE is device model has it set

2022-10-19 Thread Jason Wang
On Wed, Oct 19, 2022 at 8:52 PM Eugenio Pérez  wrote:
>
> Temporal, as this affects other vhost backends and we must check status
> feature first.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  hw/net/vhost_net.c | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 5660606c1d..300f370e2a 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -118,6 +118,7 @@ int vhost_net_get_config(struct vhost_net *net,  uint8_t 
> *config,
>   uint32_t config_len)
>  {
>  VirtIODevice *vdev;
> +VirtIONet *n;
>  int r = vhost_dev_get_config(>dev, config, config_len, NULL);
>
>  if (unlikely(r != 0)) {
> @@ -142,6 +143,13 @@ int vhost_net_get_config(struct vhost_net *net,  uint8_t 
> *config,
>  ((struct virtio_net_config *)config)->status |= VIRTIO_NET_S_LINK_UP;
>  }
>
> +if (!(net->dev.acked_features & BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE))) {
> +return 0;
> +}
> +
> +n = VIRTIO_NET(vdev);
> +((struct virtio_net_config *)config)->status |=
> +   (n->status & 
> VIRTIO_NET_S_ANNOUNCE);

Similar to the previous patch, it would be better to move this to virtio-net.c.

Thanks

>  return 0;
>  }
>  int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
> --
> 2.31.1
>




Re: [RFC PATCH v2 6/8] vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in vhost_vdpa_net_handle_ctrl_avail

2022-10-19 Thread Jason Wang
On Wed, Oct 19, 2022 at 8:52 PM Eugenio Pérez  wrote:
>
> Since this capability is emulated by qemu shadowed CVQ cannot forward it
> to the device.

I wonder what happens for a device that has GUEST_ANNOUNCE support on its own?

> Process all that command within qemu.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  net/vhost-vdpa.c | 15 ---
>  1 file changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 3374c21b4d..5fda405a66 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -488,9 +488,18 @@ static int 
> vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
>  out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
>   s->cvq_cmd_out_buffer,
>   vhost_vdpa_net_cvq_cmd_len());
> -dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
> -if (unlikely(dev_written < 0)) {
> -goto out;
> +if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) {

Interesting, I thought we can do better by forbidding the code that
goes into vhost-vDPA specific code, everything should be set at
virtio-net.c level.

Thanks

> +/*
> + * Guest announce capability is emulated by qemu, so dont forward to
> + * the device.
> + */
> +dev_written = sizeof(status);
> +*s->status = VIRTIO_NET_OK;
> +} else {
> +dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
> +if (unlikely(dev_written < 0)) {
> +goto out;
> +}
>  }
>
>  if (unlikely(dev_written < sizeof(status))) {
> --
> 2.31.1
>




Re: [RFC PATCH v2 5/8] vdpa: Remove shadow CVQ command check

2022-10-19 Thread Jason Wang
On Wed, Oct 19, 2022 at 8:52 PM Eugenio Pérez  wrote:
>
> The guest will see undefined behavior if it issue not negotiate
> commands, bit it is expected somehow.
>
> Simplify code deleting this check.
>
> Signed-off-by: Eugenio Pérez 

Acked-by: Jason Wang 

> ---
>  net/vhost-vdpa.c | 48 
>  1 file changed, 48 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index fca21d5b79..3374c21b4d 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -461,48 +461,6 @@ static NetClientInfo net_vhost_vdpa_cvq_info = {
>  .check_peer_type = vhost_vdpa_check_peer_type,
>  };
>
> -/**
> - * Do not forward commands not supported by SVQ. Otherwise, the device could
> - * accept it and qemu would not know how to update the device model.
> - */
> -static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len)
> -{
> -struct virtio_net_ctrl_hdr ctrl;
> -
> -if (unlikely(len < sizeof(ctrl))) {
> -qemu_log_mask(LOG_GUEST_ERROR,
> -  "%s: invalid legnth of out buffer %zu\n", __func__, 
> len);
> -return false;
> -}
> -
> -memcpy(, out_buf, sizeof(ctrl));
> -switch (ctrl.class) {
> -case VIRTIO_NET_CTRL_MAC:
> -switch (ctrl.cmd) {
> -case VIRTIO_NET_CTRL_MAC_ADDR_SET:
> -return true;
> -default:
> -qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
> -  __func__, ctrl.cmd);
> -};
> -break;
> -case VIRTIO_NET_CTRL_MQ:
> -switch (ctrl.cmd) {
> -case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
> -return true;
> -default:
> -qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mq cmd %u\n",
> -  __func__, ctrl.cmd);
> -};
> -break;
> -default:
> -qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
> -  __func__, ctrl.class);
> -};
> -
> -return false;
> -}
> -
>  /**
>   * Validate and copy control virtqueue commands.
>   *
> @@ -526,16 +484,10 @@ static int 
> vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
>  .iov_len = sizeof(status),
>  };
>  ssize_t dev_written = -EINVAL;
> -bool ok;
>
>  out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
>   s->cvq_cmd_out_buffer,
>   vhost_vdpa_net_cvq_cmd_len());
> -ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len);
> -if (unlikely(!ok)) {
> -goto out;
> -}
> -
>  dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
>  if (unlikely(dev_written < 0)) {
>  goto out;
> --
> 2.31.1
>




Re: [RFC PATCH v2 3/8] vhost_net: Emulate link state up if backend doesn't expose it

2022-10-19 Thread Jason Wang
On Wed, Oct 19, 2022 at 8:52 PM Eugenio Pérez  wrote:
>
> At this moment this code path is not reached, but vdpa devices can offer
> VIRTIO_NET_F_STATUS unconditionally.

So I guess what you mean is that, for the parent that doesn't support
VIRTIO_NET_F_STATUS, emulate one for making sure the ANNOUCNE to work.
This is safe since the spec said the driver will assume the link is
always up if without this feature.

> While the guest must assume that
> link is always up by the standard, qemu will set the status bit to 1
> always in this case.
>
> This makes little use by itself, but VIRTIO_NET_F_STATUS is needed for
> the guest to read status bit VIRTIO_NET_F_GUEST_ANNOUNCE, used by feature
> VIRTIO_NET_F_GUEST_ANNOUNCE. So qemu must emulate status feature in case
> it needs to emulate the guest announce feature.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  hw/net/vhost_net.c | 27 ++-
>  1 file changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index d28f8b974b..5660606c1d 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -117,7 +117,32 @@ uint64_t vhost_net_get_features(struct vhost_net *net, 
> uint64_t features)
>  int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
>   uint32_t config_len)
>  {
> -return vhost_dev_get_config(>dev, config, config_len, NULL);
> +VirtIODevice *vdev;
> +int r = vhost_dev_get_config(>dev, config, config_len, NULL);
> +
> +if (unlikely(r != 0)) {
> +return r;
> +}
> +
> +if (config_len < endof(struct virtio_net_config, status)) {
> +return 0;
> +}
> +
> +/*
> + * TODO: Perform this only if vhost_vdpa.
> + */

Cindy adds some mediation codes for vhost-vDPA in
virtio_net_get_config(), so I believe it can be done there?

Thanks

> +vdev = net->dev.vdev;
> +if (!vdev) {
> +/* Device is starting */
> +return 0;
> +}
> +
> +if ((net->dev.acked_features & BIT_ULL(VIRTIO_NET_F_STATUS)) &&
> +!(net->dev.features & BIT_ULL(VIRTIO_NET_F_STATUS))) {
> +((struct virtio_net_config *)config)->status |= VIRTIO_NET_S_LINK_UP;
> +}
> +
> +return 0;
>  }
>  int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
>   uint32_t offset, uint32_t size, uint32_t flags)
> --
> 2.31.1
>




Re: [RFC PATCH v2 0/8] Guest announce feature emulation using Shadow VirtQueue

2022-10-19 Thread Jason Wang
On Wed, Oct 19, 2022 at 8:52 PM Eugenio Pérez  wrote:
>
> A gratuitous ARP is recommended after a live migration to reduce the amount of
> time needed by the network links to be aware of the new location.

A question: I think we need to deal with the case when GUSET_ANNOUNCE
is not negotiated? E.d sending the gARP by ourselves via vhost-vDPA?

Thanks

> A hypervisor
> may not have the knowledge of the guest network configuration, and this is
> especially true on passthrough devices, so its simpler to ask the guest to
> do it.
>
> However, the device control part of this feature can be totally emulated by
> qemu and shadow virtqueue, not needing any special feature from the actual
> vdpa device.
>
> VIRTIO_NET_F_STATUS is also needed for the guest to access the status of
> virtio net config where announcement status bit is set. Emulating it as
> always active in case backend does not support it.
>
> v2:
> * Add VIRTIO_NET_F_STATUS emulation.
>
> Eugenio Pérez (8):
>   vdpa: Delete duplicated vdpa_feature_bits entry
>   vdpa: Save emulated features list in vhost_vdpa
>   vhost_net: Emulate link state up if backend doesn't expose it
>   vdpa: Expose VIRTIO_NET_F_STATUS unconditionally
>   vdpa: Remove shadow CVQ command check
>   vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in
> vhost_vdpa_net_handle_ctrl_avail
>   vhost_net: return VIRTIO_NET_S_ANNOUNCE is device model has it set
>   vdpa: Offer VIRTIO_NET_F_GUEST_ANNOUNCE feature if SVQ is enabled
>
>  include/hw/virtio/vhost-vdpa.h |  2 +
>  hw/net/vhost_net.c | 35 +++-
>  hw/virtio/vhost-vdpa.c |  8 ++--
>  net/vhost-vdpa.c   | 74 ++
>  4 files changed, 62 insertions(+), 57 deletions(-)
>
> --
> 2.31.1
>
>




Re: [RFC PATCH v2 2/8] vdpa: Save emulated features list in vhost_vdpa

2022-10-19 Thread Jason Wang
On Wed, Oct 19, 2022 at 8:52 PM Eugenio Pérez  wrote:
>
> At this moment only _F_LOG is added there.
>
> However future patches add features that depend on the kind of device.
> In particular, only net devices can add VIRTIO_F_GUEST_ANNOUNCE. So
> let's allow vhost_vdpa creator to set custom emulated device features.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  include/hw/virtio/vhost-vdpa.h | 2 ++
>  hw/virtio/vhost-vdpa.c | 8 
>  net/vhost-vdpa.c   | 4 
>  3 files changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index d85643..50083e1e3b 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -31,6 +31,8 @@ typedef struct vhost_vdpa {
>  bool iotlb_batch_begin_sent;
>  MemoryListener listener;
>  struct vhost_vdpa_iova_range iova_range;
> +/* VirtIO device features that can be emulated by qemu */
> +uint64_t added_features;

Any reason we need a per vhost_vdpa storage for this? Or is there a
chance that this field could be different among the devices?

Thanks

>  uint64_t acked_features;
>  bool shadow_vqs_enabled;
>  /* IOVA mapping used by the Shadow Virtqueue */
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 7468e44b87..ddb5e29288 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -660,8 +660,8 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,
>
>  v->acked_features = features;
>
> -/* We must not ack _F_LOG if SVQ is enabled */
> -features &= ~BIT_ULL(VHOST_F_LOG_ALL);
> +/* Do not ack features emulated by qemu */
> +features &= ~v->added_features;
>  }
>
>  trace_vhost_vdpa_set_features(dev, features);
> @@ -1244,8 +1244,8 @@ static int vhost_vdpa_get_features(struct vhost_dev 
> *dev,
>  int ret = vhost_vdpa_get_dev_features(dev, features);
>
>  if (ret == 0 && v->shadow_vqs_enabled) {
> -/* Add SVQ logging capabilities */
> -*features |= BIT_ULL(VHOST_F_LOG_ALL);
> +/* Add emulated capabilities */
> +*features |= v->added_features;
>  }
>
>  return ret;
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index eebf29f5c1..3803452800 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -599,6 +599,10 @@ static NetClientState 
> *net_vhost_vdpa_init(NetClientState *peer,
>  s->vhost_vdpa.index = queue_pair_index;
>  s->vhost_vdpa.shadow_vqs_enabled = svq;
>  s->vhost_vdpa.iova_tree = iova_tree;
> +if (svq) {
> +/* Add SVQ logging capabilities */
> +s->vhost_vdpa.added_features |= BIT_ULL(VHOST_F_LOG_ALL);
> +}
>  if (!is_datapath) {
>  s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
>  
> vhost_vdpa_net_cvq_cmd_page_len());
> --
> 2.31.1
>




Re: [PATCH v2 2/2] virtio-net: fix TX timer with tx_burst

2022-10-19 Thread Jason Wang
On Fri, Oct 14, 2022 at 9:20 PM Laurent Vivier  wrote:
>
> When virtio_net_flush_tx() reaches the tx_burst value all
> the queue is not flushed and nothing restart the timer.
>
> Fix that by doing for TX timer as we do for bottom half TX:
> rearming the timer if we find any packet to send during the
> virtio_net_flush_tx() call.
>
> Fixes: e3f30488e5f8 ("virtio-net: Limit number of packets sent per TX flush")
> Cc: alex.william...@redhat.com
> Signed-off-by: Laurent Vivier 
> ---
>  hw/net/virtio-net.c | 59 +++--
>  1 file changed, 46 insertions(+), 13 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 1fbf2f3e19a7..b4964b821021 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -2536,14 +2536,19 @@ static void virtio_net_tx_complete(NetClientState 
> *nc, ssize_t len)
>
>  virtio_queue_set_notification(q->tx_vq, 1);
>  ret = virtio_net_flush_tx(q);
> -if (q->tx_bh && ret >= n->tx_burst) {
> +if (ret >= n->tx_burst) {
>  /*
>   * the flush has been stopped by tx_burst
>   * we will not receive notification for the
>   * remainining part, so re-schedule
>   */
>  virtio_queue_set_notification(q->tx_vq, 0);
> -qemu_bh_schedule(q->tx_bh);
> +if (q->tx_bh) {
> +qemu_bh_schedule(q->tx_bh);
> +} else {
> +timer_mod(q->tx_timer,
> +  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> +}
>  q->tx_waiting = 1;
>  }
>  }
> @@ -2644,6 +2649,8 @@ drop:
>  return num_packets;
>  }
>
> +static void virtio_net_tx_timer(void *opaque);
> +
>  static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
>  {
>  VirtIONet *n = VIRTIO_NET(vdev);
> @@ -2661,18 +2668,17 @@ static void virtio_net_handle_tx_timer(VirtIODevice 
> *vdev, VirtQueue *vq)
>  }
>
>  if (q->tx_waiting) {
> -virtio_queue_set_notification(vq, 1);
> +/* We already have queued packets, immediately flush */
>  timer_del(q->tx_timer);
> -q->tx_waiting = 0;
> -if (virtio_net_flush_tx(q) == -EINVAL) {
> -return;
> -}
> -} else {
> -timer_mod(q->tx_timer,
> -   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 
> n->tx_timeout);
> -q->tx_waiting = 1;
> -virtio_queue_set_notification(vq, 0);
> +virtio_net_tx_timer(q);
> +return;
>  }
> +
> +/* re-arm timer to flush it (and more) on next tick */
> +timer_mod(q->tx_timer,
> +  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> +q->tx_waiting = 1;
> +virtio_queue_set_notification(vq, 0);
>  }

Nit: if we stick the above in the else, we can avoid a lot of changes.

Others look good.

Thanks

>
>  static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
> @@ -2702,6 +2708,8 @@ static void virtio_net_tx_timer(void *opaque)
>  VirtIONetQueue *q = opaque;
>  VirtIONet *n = q->n;
>  VirtIODevice *vdev = VIRTIO_DEVICE(n);
> +int ret;
> +
>  /* This happens when device was stopped but BH wasn't. */
>  if (!vdev->vm_running) {
>  /* Make sure tx waiting is set, so we'll run when restarted. */
> @@ -2716,8 +2724,33 @@ static void virtio_net_tx_timer(void *opaque)
>  return;
>  }
>
> +ret = virtio_net_flush_tx(q);
> +if (ret == -EBUSY || ret == -EINVAL) {
> +return;
> +}
> +/*
> + * If we flush a full burst of packets, assume there are
> + * more coming and immediately rearm
> + */
> +if (ret >= n->tx_burst) {
> +q->tx_waiting = 1;
> +timer_mod(q->tx_timer,
> +  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> +return;
> +}
> +/*
> + * If less than a full burst, re-enable notification and flush
> + * anything that may have come in while we weren't looking.  If
> + * we find something, assume the guest is still active and rearm
> + */
>  virtio_queue_set_notification(q->tx_vq, 1);
> -virtio_net_flush_tx(q);
> +ret = virtio_net_flush_tx(q);
> +if (ret > 0) {
> +virtio_queue_set_notification(q->tx_vq, 0);
> +q->tx_waiting = 1;
> +timer_mod(q->tx_timer,
> +  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> +}
>  }
>
>  static void virtio_net_tx_bh(void *opaque)
> --
> 2.37.3
>




Re: [RFC PATCH v2 1/8] vdpa: Delete duplicated vdpa_feature_bits entry

2022-10-19 Thread Jason Wang
On Wed, Oct 19, 2022 at 8:52 PM Eugenio Pérez  wrote:
>
> This entry was duplicated on referenced commit. Removing it.
>
> Fixes: 402378407dbd ("vhost-vdpa: multiqueue support")
> Signed-off-by: Eugenio Pérez 

Acked-by: Jason Wang 

> ---
>  net/vhost-vdpa.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 4bc3fd01a8..eebf29f5c1 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -63,7 +63,6 @@ const int vdpa_feature_bits[] = {
>  VIRTIO_NET_F_CTRL_RX,
>  VIRTIO_NET_F_CTRL_RX_EXTRA,
>  VIRTIO_NET_F_CTRL_VLAN,
> -VIRTIO_NET_F_GUEST_ANNOUNCE,
>  VIRTIO_NET_F_CTRL_MAC_ADDR,
>  VIRTIO_NET_F_RSS,
>  VIRTIO_NET_F_MQ,
> --
> 2.31.1
>




Re: [PATCH] x86-iommu: Fail flag registration of DEVIOTLB if DT not supported

2022-10-19 Thread Jason Wang
On Wed, Oct 19, 2022 at 10:54 PM Peter Xu  wrote:
>
> On Wed, Oct 19, 2022 at 04:12:22PM +0200, Eric Auger wrote:
> > Hi Peter,
> >
> > On 10/19/22 16:01, Peter Xu wrote:
> > > Hi, Eric,
> > >
> > > On Wed, Oct 19, 2022 at 01:24:15PM +0200, Eric Auger wrote:
> > >>> @@ -1484,6 +1485,13 @@ static int 
> > >>> amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
> > >>> PCI_FUNC(as->devfn));
> > >>>  return -EINVAL;
> > >>>  }
> > >>> +
> > >>> +if ((new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) && 
> > >>> !x86_iommu->dt_supported) {
> > >>> +error_setg_errno(errp, ENOTSUP,
> > >>> + "Device-iotlb not declared support for 
> > >>> vIOMMU");
> > >> with current vhost code, vhost will then silently fallbac to UNMAP
> > >> notifier registration and this will succeed. It would be nice to clarify
> > >> whether the vIOMMU works with vhost in this downgraded mode (at least
> > >> ats=off and device-ioltb=off)?
> > > I'm slightly confused, why do we need to clarify that?
> > >
> > > As we have discussed, if a device with ATS capability got attached into a
> > > vIOMMU context that does not support ATS, then it should just work like
> > > without ATS without any warning.  Isn't this the case here?
> >
> > Yes that's the theory and what should happen at baremetal level. However
> > I am not sure this is still true with the intel-iommu emulation/vhost
> > integration.
> > Remember we always assumed vhost was supported on intel with both ats=on
> > and device-iotlb=on if I am correct.
> >
> > vhost/viommu integration requires unmap notifications to be properly
> > sent from viommu to vhost, would it be though DEVIOTLB_UNMAP or UNMAP
> > notifiers.
> > Does the intel-iommu/vhost works if both ats=off and device-iotlb=off or
> > ats=on and device-iotlb=off. This I am not sure. I gave it a try and I
> > got some errors but maybe that's something else...
>
> Indeed it's not working.  Obviously my test on this patch is not complete,
> as I was testing with pings and even after patch applied the ping can get
> lost after a few successful ones.  I should have tried harder.
>
> Yes only device-iotlb=on & ats=on work in my case, all the rest three
> combinations won't work reliably or just don't work, but I do expect they
> should all work, so I definitely missed something here.  Jason, are you
> aware of the problem?

Haven't tried but I guess there would be some issue other than the
suggested configuration "ats=on, device-iotlb=on"

So we have:

1) ats=on, device-iotlb=on, this is the configuration that libvirt is
using and it should work
2) ats=off, device-iotlb=on, in this case, the DEVICEIOTLB_UNMAP
notifier will succeed but there won't be a device iotlb invalidation
sent from guest, so we will meet errors since there's no way to flush
device IOTLB. According to the PCIe spec, the device should still work
(using untranslated transactions). In this case we probably need a way
to detect if device page fault (ats) is enabled and fallback to UNMAP
if it doesn't.
3) ats=on, device-iotlb=off, in this case, without your patch, it
won't work since the DEVICEIOTLB_UNMAP will succeed but guest won't
enable ATS so there will be no IOTLB invalidation. With your patch, we
fallback to UNMAP and I think it should then work
4) ats=off, device-iotlb=off, similar to 3), it won't work without
your patch, but with your patch we fallback to UNMAP so it should
work.

Thanks

>
> Thanks,
>
> --
> Peter Xu
>




Re: [PATCH v2] vhost: Warn if DEVIOTLB_UNMAP is not supported and ats is set

2022-10-19 Thread Jason Wang
On Wed, Oct 19, 2022 at 8:27 PM Eric Auger  wrote:
>
> Since b68ba1ca5767 ("memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP
> IOMMUTLBNotificationType"), vhost attempts to register DEVIOTLB_UNMAP
> notifier. This latter is supported by the intel-iommu which supports
> device-iotlb if the corresponding option is set. Then 958ec334bca3
> ("vhost: Unbreak SMMU and virtio-iommu on dev-iotlb support") allowed
> silent fallback to the legacy UNMAP notifier if the viommu does not
> support device iotlb.
>
> Initially vhost/viommu integration was introduced with intel iommu
> assuming ats=on was set on virtio-pci device and device-iotlb was set
> on the intel iommu. vhost acts as an ATS capable device since it
> implements an IOTLB on kernel side. However translated transactions
> that hit the device IOTLB do not transit through the vIOMMU. So this
> requires a limited ATS support on viommu side. Anyway this assumed
> ATS was eventually enabled .
>
> But neither SMMUv3 nor virtio-iommu do support ATS and the integration
> with vhost just relies on the fact those vIOMMU send UNMAP notifications
> whenever the guest trigger them. This works without ATS being enabled.
>
> This patch makes sure we get a warning if ATS is set on a device
> protected by virtio-iommu or vsmmuv3, reminding that we don't have
> full support of ATS on those vIOMMUs and setting ats=on on the
> virtio-pci end-point is not a requirement.
>
> Signed-off-by: Eric Auger 
>
> ---
>
> v1 -> v2:
> - s/enabled/capable
> - tweak the error message on vhost side
> ---
>  include/hw/virtio/virtio-bus.h |  3 +++
>  hw/virtio/vhost.c  | 21 -
>  hw/virtio/virtio-bus.c | 14 ++
>  hw/virtio/virtio-pci.c | 11 +++
>  4 files changed, 48 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
> index 7ab8c9dab0..23360a1daa 100644
> --- a/include/hw/virtio/virtio-bus.h
> +++ b/include/hw/virtio/virtio-bus.h
> @@ -94,6 +94,7 @@ struct VirtioBusClass {
>  bool has_variable_vring_alignment;
>  AddressSpace *(*get_dma_as)(DeviceState *d);
>  bool (*iommu_enabled)(DeviceState *d);
> +bool (*ats_capable)(DeviceState *d);
>  };
>
>  struct VirtioBusState {
> @@ -157,4 +158,6 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int 
> n, bool assign);
>  void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n);
>  /* Whether the IOMMU is enabled for this device */
>  bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev);
> +/* Whether ATS is enabled for this device */
> +bool virtio_bus_device_ats_capable(VirtIODevice *vdev);
>  #endif /* VIRTIO_BUS_H */
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 5185c15295..3cf9efce5e 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -324,6 +324,16 @@ static bool vhost_dev_has_iommu(struct vhost_dev *dev)
>  }
>  }
>
> +static bool vhost_dev_ats_capable(struct vhost_dev *dev)

I suggest to rename this as pf_capable() since ATS is PCI specific but
vhost isn't.

> +{
> +VirtIODevice *vdev = dev->vdev;
> +
> +if (vdev && virtio_bus_device_ats_capable(vdev)) {
> +return true;
> +}
> +return false;
> +}
> +
>  static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
>hwaddr *plen, bool is_write)
>  {
> @@ -737,6 +747,7 @@ static void vhost_iommu_region_add(MemoryListener 
> *listener,
>  Int128 end;
>  int iommu_idx;
>  IOMMUMemoryRegion *iommu_mr;
> +Error *err = NULL;
>  int ret;
>
>  if (!memory_region_is_iommu(section->mr)) {
> @@ -760,8 +771,16 @@ static void vhost_iommu_region_add(MemoryListener 
> *listener,
>  iommu->iommu_offset = section->offset_within_address_space -
>section->offset_within_region;
>  iommu->hdev = dev;
> -ret = memory_region_register_iommu_notifier(section->mr, >n, 
> NULL);
> +ret = memory_region_register_iommu_notifier(section->mr, >n, 
> );
>  if (ret) {
> +if (vhost_dev_ats_capable(dev)) {
> +error_reportf_err(err,
> +  "%s: Although the device exposes ATS 
> capability, "
> +  "fallback to legacy IOMMU UNMAP notifier: ",
> +  iommu_mr->parent_obj.name);

I'm not sure if it's a real error, or I wonder what we need to do is

1) check is ATS is enabled
2) fallback to UNMAP is ATS is not enabled

> +} else {
> +error_free(err);
> +}
>  /*
>   * Some vIOMMUs do not support dev-iotlb yet.  If so, try to use the
>   * UNMAP legacy message
> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> index 896feb37a1..d46c3f8ec4 100644
> --- a/hw/virtio/virtio-bus.c
> +++ b/hw/virtio/virtio-bus.c
> @@ -348,6 +348,20 @@ bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev)
>  return klass->iommu_enabled(qbus->parent);
>  }
>
> +bool 

[PATCH v8 1/9] target/arm: Introduce curr_insn_len

2022-10-19 Thread Richard Henderson
A simple helper to retrieve the length of the current insn.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/arm/translate.h | 5 +
 target/arm/translate-vfp.c | 2 +-
 target/arm/translate.c | 5 ++---
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/target/arm/translate.h b/target/arm/translate.h
index af5d4a7086..90bf7c57fc 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -226,6 +226,11 @@ static inline void disas_set_insn_syndrome(DisasContext 
*s, uint32_t syn)
 s->insn_start = NULL;
 }
 
+static inline int curr_insn_len(DisasContext *s)
+{
+return s->base.pc_next - s->pc_curr;
+}
+
 /* is_jmp field values */
 #define DISAS_JUMP  DISAS_TARGET_0 /* only pc was modified dynamically */
 /* CPU state was modified dynamically; exit to main loop for interrupts. */
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
index bd5ae27d09..94cc1e4b77 100644
--- a/target/arm/translate-vfp.c
+++ b/target/arm/translate-vfp.c
@@ -242,7 +242,7 @@ static bool vfp_access_check_a(DisasContext *s, bool 
ignore_vfp_enabled)
 if (s->sme_trap_nonstreaming) {
 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
syn_smetrap(SME_ET_Streaming,
-   s->base.pc_next - s->pc_curr == 2));
+   curr_insn_len(s) == 2));
 return false;
 }
 
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 2f72afe019..5752b7af5c 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -6650,7 +6650,7 @@ static ISSInfo make_issinfo(DisasContext *s, int rd, bool 
p, bool w)
 /* ISS not valid if writeback */
 if (p && !w) {
 ret = rd;
-if (s->base.pc_next - s->pc_curr == 2) {
+if (curr_insn_len(s) == 2) {
 ret |= ISSIs16Bit;
 }
 } else {
@@ -9812,8 +9812,7 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, 
CPUState *cpu)
 /* nothing more to generate */
 break;
 case DISAS_WFI:
-gen_helper_wfi(cpu_env,
-   tcg_constant_i32(dc->base.pc_next - dc->pc_curr));
+gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
 /*
  * The helper doesn't necessarily throw an exception, but we
  * must go back to the main loop to check for interrupts anyway.
-- 
2.34.1




[PATCH v8 9/9] target/arm: Enable TARGET_TB_PCREL

2022-10-19 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
v7: Introduce DisasLabel to clean up pc_save frobbing.
Adjust pc_save around tcg_remove_ops_after.
---
 target/arm/cpu-param.h|   1 +
 target/arm/translate.h|  50 +++-
 target/arm/cpu.c  |  23 
 target/arm/translate-a64.c|  64 +---
 target/arm/translate-m-nocp.c |   2 +-
 target/arm/translate.c| 108 +++---
 6 files changed, 177 insertions(+), 71 deletions(-)

diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h
index 08681828ac..ae472cf330 100644
--- a/target/arm/cpu-param.h
+++ b/target/arm/cpu-param.h
@@ -30,6 +30,7 @@
  */
 # define TARGET_PAGE_BITS_VARY
 # define TARGET_PAGE_BITS_MIN  10
+# define TARGET_TB_PCREL 1
 #endif
 
 #define NB_MMU_MODES 8
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 4aa239e23c..3cdc7dbc2f 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -6,18 +6,42 @@
 
 
 /* internal defines */
+
+/*
+ * Save pc_save across a branch, so that we may restore the value from
+ * before the branch at the point the label is emitted.
+ */
+typedef struct DisasLabel {
+TCGLabel *label;
+target_ulong pc_save;
+} DisasLabel;
+
 typedef struct DisasContext {
 DisasContextBase base;
 const ARMISARegisters *isar;
 
 /* The address of the current instruction being translated. */
 target_ulong pc_curr;
+/*
+ * For TARGET_TB_PCREL, the full value of cpu_pc is not known
+ * (although the page offset is known).  For convenience, the
+ * translation loop uses the full virtual address that triggered
+ * the translation, from base.pc_start through pc_curr.
+ * For efficiency, we do not update cpu_pc for every instruction.
+ * Instead, pc_save has the value of pc_curr at the time of the
+ * last update to cpu_pc, which allows us to compute the addend
+ * needed to bring cpu_pc current: pc_curr - pc_save.
+ * If cpu_pc now contains the destination of an indirect branch,
+ * pc_save contains -1 to indicate that relative updates are no
+ * longer possible.
+ */
+target_ulong pc_save;
 target_ulong page_start;
 uint32_t insn;
 /* Nonzero if this instruction has been conditionally skipped.  */
 int condjmp;
 /* The label that will be jumped to when the instruction is skipped.  */
-TCGLabel *condlabel;
+DisasLabel condlabel;
 /* Thumb-2 conditional execution bits.  */
 int condexec_mask;
 int condexec_cond;
@@ -28,8 +52,6 @@ typedef struct DisasContext {
  * after decode (ie after any UNDEF checks)
  */
 bool eci_handled;
-/* TCG op to rewind to if this turns out to be an invalid ECI state */
-TCGOp *insn_eci_rewind;
 int sctlr_b;
 MemOp be_data;
 #if !defined(CONFIG_USER_ONLY)
@@ -566,6 +588,28 @@ static inline MemOp finalize_memop(DisasContext *s, MemOp 
opc)
  */
 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
 
+/*
+ * gen_disas_label:
+ * Create a label and cache a copy of pc_save.
+ */
+static inline DisasLabel gen_disas_label(DisasContext *s)
+{
+return (DisasLabel){
+.label = gen_new_label(),
+.pc_save = s->pc_save,
+};
+}
+
+/*
+ * set_disas_label:
+ * Emit a label and restore the cached copy of pc_save.
+ */
+static inline void set_disas_label(DisasContext *s, DisasLabel l)
+{
+gen_set_label(l.label);
+s->pc_save = l.pc_save;
+}
+
 /*
  * Helpers for implementing sets of trans_* functions.
  * Defer the implementation of NAME to FUNC, with optional extra arguments.
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 94ca6f163f..0bc5e9b125 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -76,17 +76,18 @@ static vaddr arm_cpu_get_pc(CPUState *cs)
 void arm_cpu_synchronize_from_tb(CPUState *cs,
  const TranslationBlock *tb)
 {
-ARMCPU *cpu = ARM_CPU(cs);
-CPUARMState *env = >env;
-
-/*
- * It's OK to look at env for the current mode here, because it's
- * never possible for an AArch64 TB to chain to an AArch32 TB.
- */
-if (is_a64(env)) {
-env->pc = tb_pc(tb);
-} else {
-env->regs[15] = tb_pc(tb);
+/* The program counter is always up to date with TARGET_TB_PCREL. */
+if (!TARGET_TB_PCREL) {
+CPUARMState *env = cs->env_ptr;
+/*
+ * It's OK to look at env for the current mode here, because it's
+ * never possible for an AArch64 TB to chain to an AArch32 TB.
+ */
+if (is_a64(env)) {
+env->pc = tb_pc(tb);
+} else {
+env->regs[15] = tb_pc(tb);
+}
 }
 }
 #endif /* CONFIG_TCG */
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index f9f8559c01..32e95cc2f4 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -142,12 +142,18 @@ static void reset_btype(DisasContext *s)
 
 static void gen_pc_plus_diff(DisasContext 

[PATCH v8 4/9] target/arm: Change gen_exception_insn* to work on displacements

2022-10-19 Thread Richard Henderson
In preparation for TARGET_TB_PCREL, reduce reliance on absolute values.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/arm/translate.h|  5 +++--
 target/arm/translate-a64.c| 28 ++-
 target/arm/translate-m-nocp.c |  6 ++---
 target/arm/translate-mve.c|  2 +-
 target/arm/translate-vfp.c|  6 ++---
 target/arm/translate.c| 42 +--
 6 files changed, 43 insertions(+), 46 deletions(-)

diff --git a/target/arm/translate.h b/target/arm/translate.h
index d651044855..4aa239e23c 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -281,9 +281,10 @@ void arm_jump_cc(DisasCompare *cmp, TCGLabel *label);
 void arm_gen_test_cc(int cc, TCGLabel *label);
 MemOp pow2_align(unsigned i);
 void unallocated_encoding(DisasContext *s);
-void gen_exception_insn_el(DisasContext *s, uint64_t pc, int excp,
+void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
uint32_t syn, uint32_t target_el);
-void gen_exception_insn(DisasContext *s, uint64_t pc, int excp, uint32_t syn);
+void gen_exception_insn(DisasContext *s, target_long pc_diff,
+int excp, uint32_t syn);
 
 /* Return state of Alternate Half-precision flag, caller frees result */
 static inline TCGv_i32 get_ahp_flag(void)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 585d42d5b2..49380e1cfe 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1155,7 +1155,7 @@ static bool fp_access_check_only(DisasContext *s)
 assert(!s->fp_access_checked);
 s->fp_access_checked = true;
 
-gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
+gen_exception_insn_el(s, 0, EXCP_UDEF,
   syn_fp_access_trap(1, 0xe, false, 0),
   s->fp_excp_el);
 return false;
@@ -1170,7 +1170,7 @@ static bool fp_access_check(DisasContext *s)
 return false;
 }
 if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
-gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+gen_exception_insn(s, 0, EXCP_UDEF,
syn_smetrap(SME_ET_Streaming, false));
 return false;
 }
@@ -1190,7 +1190,7 @@ bool sve_access_check(DisasContext *s)
 goto fail_exit;
 }
 } else if (s->sve_excp_el) {
-gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
+gen_exception_insn_el(s, 0, EXCP_UDEF,
   syn_sve_access_trap(), s->sve_excp_el);
 goto fail_exit;
 }
@@ -1212,7 +1212,7 @@ bool sve_access_check(DisasContext *s)
 static bool sme_access_check(DisasContext *s)
 {
 if (s->sme_excp_el) {
-gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
+gen_exception_insn_el(s, 0, EXCP_UDEF,
   syn_smetrap(SME_ET_AccessTrap, false),
   s->sme_excp_el);
 return false;
@@ -1242,12 +1242,12 @@ bool sme_enabled_check_with_svcr(DisasContext *s, 
unsigned req)
 return false;
 }
 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
-gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+gen_exception_insn(s, 0, EXCP_UDEF,
syn_smetrap(SME_ET_NotStreaming, false));
 return false;
 }
 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
-gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+gen_exception_insn(s, 0, EXCP_UDEF,
syn_smetrap(SME_ET_InactiveZA, false));
 return false;
 }
@@ -1907,7 +1907,7 @@ static void gen_sysreg_undef(DisasContext *s, bool isread,
 } else {
 syndrome = syn_uncategorized();
 }
-gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syndrome);
+gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
 }
 
 /* MRS - move from system register
@@ -2161,8 +2161,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
 switch (op2_ll) {
 case 1: /* SVC */
 gen_ss_advance(s);
-gen_exception_insn(s, s->base.pc_next, EXCP_SWI,
-   syn_aa64_svc(imm16));
+gen_exception_insn(s, 4, EXCP_SWI, syn_aa64_svc(imm16));
 break;
 case 2: /* HVC */
 if (s->current_el == 0) {
@@ -2175,8 +2174,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
 gen_a64_update_pc(s, 0);
 gen_helper_pre_hvc(cpu_env);
 gen_ss_advance(s);
-gen_exception_insn_el(s, s->base.pc_next, EXCP_HVC,
-  syn_aa64_hvc(imm16), 2);
+gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2);
 break;
 case 3: /* SMC */
 if (s->current_el == 

Re: [PATCH v4 13/24] target/arm: Add ptw_idx to S1Translate

2022-10-19 Thread Richard Henderson

On 10/17/22 20:01, Peter Maydell wrote:

On Tue, 11 Oct 2022 at 04:30, Richard Henderson
 wrote:


Hoist the computation of the mmu_idx for the ptw up to
get_phys_addr_with_struct and get_phys_addr_twostage.
This removes the duplicate check for stage2 disabled
from the middle of the walk, performing it only once.

Signed-off-by: Richard Henderson 
---
+if (!nstable) {
+/* Stage2_S -> Stage2 or Phys_S -> Phys_NS */
+ptw->in_ptw_idx &= ~1;
+ptw->in_secure = false;
+}


I feel like this bitwise manipulation of the mmuidx values
is leaving a landmine for any future re-organization of
our mmuidx use. Can we do this just using the symbolic
constant values?


I can't think of a way with just symbolic values,
but I can add BUILD_BUG_ON to validate expectations,
so that it's not a *hidden* landmine.  :-)


r~



[PATCH v8 3/9] target/arm: Change gen_*set_pc_im to gen_*update_pc

2022-10-19 Thread Richard Henderson
In preparation for TARGET_TB_PCREL, reduce reliance on
absolute values by passing in pc difference.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-a32.h |  2 +-
 target/arm/translate.h |  6 ++--
 target/arm/translate-a64.c | 32 +-
 target/arm/translate-vfp.c |  2 +-
 target/arm/translate.c | 68 --
 5 files changed, 56 insertions(+), 54 deletions(-)

diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h
index 78a84c1414..5339c22f1e 100644
--- a/target/arm/translate-a32.h
+++ b/target/arm/translate-a32.h
@@ -40,7 +40,7 @@ void write_neon_element64(TCGv_i64 src, int reg, int ele, 
MemOp memop);
 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs);
 void gen_set_cpsr(TCGv_i32 var, uint32_t mask);
 void gen_set_condexec(DisasContext *s);
-void gen_set_pc_im(DisasContext *s, target_ulong val);
+void gen_update_pc(DisasContext *s, target_long diff);
 void gen_lookup_tb(DisasContext *s);
 long vfp_reg_offset(bool dp, unsigned reg);
 long neon_full_reg_offset(unsigned reg);
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 90bf7c57fc..d651044855 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -254,7 +254,7 @@ static inline int curr_insn_len(DisasContext *s)
  * For instructions which want an immediate exit to the main loop, as opposed
  * to attempting to use lookup_and_goto_ptr.  Unlike DISAS_UPDATE_EXIT, this
  * doesn't write the PC on exiting the translation loop so you need to ensure
- * something (gen_a64_set_pc_im or runtime helper) has done so before we reach
+ * something (gen_a64_update_pc or runtime helper) has done so before we reach
  * return from cpu_tb_exec.
  */
 #define DISAS_EXIT  DISAS_TARGET_9
@@ -263,14 +263,14 @@ static inline int curr_insn_len(DisasContext *s)
 
 #ifdef TARGET_AARCH64
 void a64_translate_init(void);
-void gen_a64_set_pc_im(uint64_t val);
+void gen_a64_update_pc(DisasContext *s, target_long diff);
 extern const TranslatorOps aarch64_translator_ops;
 #else
 static inline void a64_translate_init(void)
 {
 }
 
-static inline void gen_a64_set_pc_im(uint64_t val)
+static inline void gen_a64_update_pc(DisasContext *s, target_long diff)
 {
 }
 #endif
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 6a372ed184..585d42d5b2 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -140,9 +140,9 @@ static void reset_btype(DisasContext *s)
 }
 }
 
-void gen_a64_set_pc_im(uint64_t val)
+void gen_a64_update_pc(DisasContext *s, target_long diff)
 {
-tcg_gen_movi_i64(cpu_pc, val);
+tcg_gen_movi_i64(cpu_pc, s->pc_curr + diff);
 }
 
 /*
@@ -334,14 +334,14 @@ static void gen_exception_internal(int excp)
 
 static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
 {
-gen_a64_set_pc_im(pc);
+gen_a64_update_pc(s, pc - s->pc_curr);
 gen_exception_internal(excp);
 s->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
 {
-gen_a64_set_pc_im(s->pc_curr);
+gen_a64_update_pc(s, 0);
 gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome));
 s->base.is_jmp = DISAS_NORETURN;
 }
@@ -376,11 +376,11 @@ static void gen_goto_tb(DisasContext *s, int n, int64_t 
diff)
 
 if (use_goto_tb(s, dest)) {
 tcg_gen_goto_tb(n);
-gen_a64_set_pc_im(dest);
+gen_a64_update_pc(s, diff);
 tcg_gen_exit_tb(s->base.tb, n);
 s->base.is_jmp = DISAS_NORETURN;
 } else {
-gen_a64_set_pc_im(dest);
+gen_a64_update_pc(s, diff);
 if (s->ss_active) {
 gen_step_complete_exception(s);
 } else {
@@ -1952,7 +1952,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, 
bool isread,
 uint32_t syndrome;
 
 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
-gen_a64_set_pc_im(s->pc_curr);
+gen_a64_update_pc(s, 0);
 gen_helper_access_check_cp_reg(cpu_env,
tcg_constant_ptr(ri),
tcg_constant_i32(syndrome),
@@ -1962,7 +1962,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, 
bool isread,
  * The readfn or writefn might raise an exception;
  * synchronize the CPU state in case it does.
  */
-gen_a64_set_pc_im(s->pc_curr);
+gen_a64_update_pc(s, 0);
 }
 
 /* Handle special cases first */
@@ -2172,7 +2172,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
 /* The pre HVC helper handles cases when HVC gets trapped
  * as an undefined insn by runtime configuration.
  */
-gen_a64_set_pc_im(s->pc_curr);
+gen_a64_update_pc(s, 0);
 gen_helper_pre_hvc(cpu_env);
 gen_ss_advance(s);
 gen_exception_insn_el(s, s->base.pc_next, 

[PATCH v8 0/9] target/arm: pc-relative translation blocks

2022-10-19 Thread Richard Henderson
This is the Arm specific changes required to reduce the
amount of translation for address space randomization.

Changes for v8:
  * Add a comment to both gen_goto_tb concerning the ordering
of the pc update vs the goto_tb opcode.

r~

Richard Henderson (9):
  target/arm: Introduce curr_insn_len
  target/arm: Change gen_goto_tb to work on displacements
  target/arm: Change gen_*set_pc_im to gen_*update_pc
  target/arm: Change gen_exception_insn* to work on displacements
  target/arm: Remove gen_exception_internal_insn pc argument
  target/arm: Change gen_jmp* to work on displacements
  target/arm: Introduce gen_pc_plus_diff for aarch64
  target/arm: Introduce gen_pc_plus_diff for aarch32
  target/arm: Enable TARGET_TB_PCREL

 target/arm/cpu-param.h|   1 +
 target/arm/translate-a32.h|   2 +-
 target/arm/translate.h|  66 +++-
 target/arm/cpu.c  |  23 +--
 target/arm/translate-a64.c| 199 ++--
 target/arm/translate-m-nocp.c |   8 +-
 target/arm/translate-mve.c|   2 +-
 target/arm/translate-vfp.c|  10 +-
 target/arm/translate.c| 284 --
 9 files changed, 367 insertions(+), 228 deletions(-)

-- 
2.34.1




[PATCH v8 2/9] target/arm: Change gen_goto_tb to work on displacements

2022-10-19 Thread Richard Henderson
In preparation for TARGET_TB_PCREL, reduce reliance on absolute values.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 40 --
 target/arm/translate.c | 10 ++
 2 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 5b67375f4e..6a372ed184 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -370,8 +370,10 @@ static inline bool use_goto_tb(DisasContext *s, uint64_t 
dest)
 return translator_use_goto_tb(>base, dest);
 }
 
-static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
+static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
 {
+uint64_t dest = s->pc_curr + diff;
+
 if (use_goto_tb(s, dest)) {
 tcg_gen_goto_tb(n);
 gen_a64_set_pc_im(dest);
@@ -1354,7 +1356,7 @@ static inline AArch64DecodeFn *lookup_disas_fn(const 
AArch64DecodeTable *table,
  */
 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
 {
-uint64_t addr = s->pc_curr + sextract32(insn, 0, 26) * 4;
+int64_t diff = sextract32(insn, 0, 26) * 4;
 
 if (insn & (1U << 31)) {
 /* BL Branch with link */
@@ -1363,7 +1365,7 @@ static void disas_uncond_b_imm(DisasContext *s, uint32_t 
insn)
 
 /* B Branch / BL Branch with link */
 reset_btype(s);
-gen_goto_tb(s, 0, addr);
+gen_goto_tb(s, 0, diff);
 }
 
 /* Compare and branch (immediate)
@@ -1375,14 +1377,14 @@ static void disas_uncond_b_imm(DisasContext *s, 
uint32_t insn)
 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
 {
 unsigned int sf, op, rt;
-uint64_t addr;
+int64_t diff;
 TCGLabel *label_match;
 TCGv_i64 tcg_cmp;
 
 sf = extract32(insn, 31, 1);
 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
 rt = extract32(insn, 0, 5);
-addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
+diff = sextract32(insn, 5, 19) * 4;
 
 tcg_cmp = read_cpu_reg(s, rt, sf);
 label_match = gen_new_label();
@@ -1391,9 +1393,9 @@ static void disas_comp_b_imm(DisasContext *s, uint32_t 
insn)
 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
 tcg_cmp, 0, label_match);
 
-gen_goto_tb(s, 0, s->base.pc_next);
+gen_goto_tb(s, 0, 4);
 gen_set_label(label_match);
-gen_goto_tb(s, 1, addr);
+gen_goto_tb(s, 1, diff);
 }
 
 /* Test and branch (immediate)
@@ -1405,13 +1407,13 @@ static void disas_comp_b_imm(DisasContext *s, uint32_t 
insn)
 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
 {
 unsigned int bit_pos, op, rt;
-uint64_t addr;
+int64_t diff;
 TCGLabel *label_match;
 TCGv_i64 tcg_cmp;
 
 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
-addr = s->pc_curr + sextract32(insn, 5, 14) * 4;
+diff = sextract32(insn, 5, 14) * 4;
 rt = extract32(insn, 0, 5);
 
 tcg_cmp = tcg_temp_new_i64();
@@ -1422,9 +1424,9 @@ static void disas_test_b_imm(DisasContext *s, uint32_t 
insn)
 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
 tcg_cmp, 0, label_match);
 tcg_temp_free_i64(tcg_cmp);
-gen_goto_tb(s, 0, s->base.pc_next);
+gen_goto_tb(s, 0, 4);
 gen_set_label(label_match);
-gen_goto_tb(s, 1, addr);
+gen_goto_tb(s, 1, diff);
 }
 
 /* Conditional branch (immediate)
@@ -1436,13 +1438,13 @@ static void disas_test_b_imm(DisasContext *s, uint32_t 
insn)
 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
 {
 unsigned int cond;
-uint64_t addr;
+int64_t diff;
 
 if ((insn & (1 << 4)) || (insn & (1 << 24))) {
 unallocated_encoding(s);
 return;
 }
-addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
+diff = sextract32(insn, 5, 19) * 4;
 cond = extract32(insn, 0, 4);
 
 reset_btype(s);
@@ -1450,12 +1452,12 @@ static void disas_cond_b_imm(DisasContext *s, uint32_t 
insn)
 /* genuinely conditional branches */
 TCGLabel *label_match = gen_new_label();
 arm_gen_test_cc(cond, label_match);
-gen_goto_tb(s, 0, s->base.pc_next);
+gen_goto_tb(s, 0, 4);
 gen_set_label(label_match);
-gen_goto_tb(s, 1, addr);
+gen_goto_tb(s, 1, diff);
 } else {
 /* 0xe and 0xf are both "always" conditions */
-gen_goto_tb(s, 0, addr);
+gen_goto_tb(s, 0, diff);
 }
 }
 
@@ -1629,7 +1631,7 @@ static void handle_sync(DisasContext *s, uint32_t insn,
  * any pending interrupts immediately.
  */
 reset_btype(s);
-gen_goto_tb(s, 0, s->base.pc_next);
+gen_goto_tb(s, 0, 4);
 return;
 
 case 7: /* SB */
@@ -1641,7 +1643,7 @@ static void handle_sync(DisasContext *s, uint32_t insn,
  * MB and end the TB instead.
  */
 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
-gen_goto_tb(s, 0, s->base.pc_next);
+  

[PATCH v8 5/9] target/arm: Remove gen_exception_internal_insn pc argument

2022-10-19 Thread Richard Henderson
In preparation for TARGET_TB_PCREL, reduce reliance on absolute values.
Since we always pass dc->pc_curr, fold the arithmetic to zero displacement.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c |  6 +++---
 target/arm/translate.c | 10 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 49380e1cfe..623f7e2e96 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -332,9 +332,9 @@ static void gen_exception_internal(int excp)
 gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
 }
 
-static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
+static void gen_exception_internal_insn(DisasContext *s, int excp)
 {
-gen_a64_update_pc(s, pc - s->pc_curr);
+gen_a64_update_pc(s, 0);
 gen_exception_internal(excp);
 s->base.is_jmp = DISAS_NORETURN;
 }
@@ -2211,7 +2211,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
  * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
  */
 if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) {
-gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
+gen_exception_internal_insn(s, EXCP_SEMIHOST);
 } else {
 unallocated_encoding(s);
 }
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 350f991649..9104ab8232 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -1074,10 +1074,10 @@ static inline void gen_smc(DisasContext *s)
 s->base.is_jmp = DISAS_SMC;
 }
 
-static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
+static void gen_exception_internal_insn(DisasContext *s, int excp)
 {
 gen_set_condexec(s);
-gen_update_pc(s, pc - s->pc_curr);
+gen_update_pc(s, 0);
 gen_exception_internal(excp);
 s->base.is_jmp = DISAS_NORETURN;
 }
@@ -1169,7 +1169,7 @@ static inline void gen_hlt(DisasContext *s, int imm)
  */
 if (semihosting_enabled(s->current_el != 0) &&
 (imm == (s->thumb ? 0x3c : 0xf000))) {
-gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
+gen_exception_internal_insn(s, EXCP_SEMIHOST);
 return;
 }
 
@@ -6556,7 +6556,7 @@ static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
 if (arm_dc_feature(s, ARM_FEATURE_M) &&
 semihosting_enabled(s->current_el == 0) &&
 (a->imm == 0xab)) {
-gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
+gen_exception_internal_insn(s, EXCP_SEMIHOST);
 } else {
 gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
 }
@@ -8762,7 +8762,7 @@ static bool trans_SVC(DisasContext *s, arg_SVC *a)
 if (!arm_dc_feature(s, ARM_FEATURE_M) &&
 semihosting_enabled(s->current_el == 0) &&
 (a->imm == semihost_imm)) {
-gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
+gen_exception_internal_insn(s, EXCP_SEMIHOST);
 } else {
 gen_update_pc(s, curr_insn_len(s));
 s->svc_imm = a->imm;
-- 
2.34.1




[PATCH v8 7/9] target/arm: Introduce gen_pc_plus_diff for aarch64

2022-10-19 Thread Richard Henderson
In preparation for TARGET_TB_PCREL, reduce reliance on absolute values.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 41 +++---
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 623f7e2e96..f9f8559c01 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -140,9 +140,14 @@ static void reset_btype(DisasContext *s)
 }
 }
 
+static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
+{
+tcg_gen_movi_i64(dest, s->pc_curr + diff);
+}
+
 void gen_a64_update_pc(DisasContext *s, target_long diff)
 {
-tcg_gen_movi_i64(cpu_pc, s->pc_curr + diff);
+gen_pc_plus_diff(s, cpu_pc, diff);
 }
 
 /*
@@ -1360,7 +1365,7 @@ static void disas_uncond_b_imm(DisasContext *s, uint32_t 
insn)
 
 if (insn & (1U << 31)) {
 /* BL Branch with link */
-tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
+gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
 }
 
 /* B Branch / BL Branch with link */
@@ -2301,11 +2306,17 @@ static void disas_uncond_b_reg(DisasContext *s, 
uint32_t insn)
 default:
 goto do_unallocated;
 }
-gen_a64_set_pc(s, dst);
 /* BLR also needs to load return address */
 if (opc == 1) {
-tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
+TCGv_i64 lr = cpu_reg(s, 30);
+if (dst == lr) {
+TCGv_i64 tmp = new_tmp_a64(s);
+tcg_gen_mov_i64(tmp, dst);
+dst = tmp;
+}
+gen_pc_plus_diff(s, lr, curr_insn_len(s));
 }
+gen_a64_set_pc(s, dst);
 break;
 
 case 8: /* BRAA */
@@ -2328,11 +2339,17 @@ static void disas_uncond_b_reg(DisasContext *s, 
uint32_t insn)
 } else {
 dst = cpu_reg(s, rn);
 }
-gen_a64_set_pc(s, dst);
 /* BLRAA also needs to load return address */
 if (opc == 9) {
-tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
+TCGv_i64 lr = cpu_reg(s, 30);
+if (dst == lr) {
+TCGv_i64 tmp = new_tmp_a64(s);
+tcg_gen_mov_i64(tmp, dst);
+dst = tmp;
+}
+gen_pc_plus_diff(s, lr, curr_insn_len(s));
 }
+gen_a64_set_pc(s, dst);
 break;
 
 case 4: /* ERET */
@@ -2900,7 +2917,8 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
 
 tcg_rt = cpu_reg(s, rt);
 
-clean_addr = tcg_constant_i64(s->pc_curr + imm);
+clean_addr = new_tmp_a64(s);
+gen_pc_plus_diff(s, clean_addr, imm);
 if (is_vector) {
 do_fp_ld(s, rt, clean_addr, size);
 } else {
@@ -4244,23 +4262,22 @@ static void disas_ldst(DisasContext *s, uint32_t insn)
 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
 {
 unsigned int page, rd;
-uint64_t base;
-uint64_t offset;
+int64_t offset;
 
 page = extract32(insn, 31, 1);
 /* SignExtend(immhi:immlo) -> offset */
 offset = sextract64(insn, 5, 19);
 offset = offset << 2 | extract32(insn, 29, 2);
 rd = extract32(insn, 0, 5);
-base = s->pc_curr;
 
 if (page) {
 /* ADRP (page based) */
-base &= ~0xfff;
 offset <<= 12;
+/* The page offset is ok for TARGET_TB_PCREL. */
+offset -= s->pc_curr & 0xfff;
 }
 
-tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
+gen_pc_plus_diff(s, cpu_reg(s, rd), offset);
 }
 
 /*
-- 
2.34.1




[PATCH v8 8/9] target/arm: Introduce gen_pc_plus_diff for aarch32

2022-10-19 Thread Richard Henderson
In preparation for TARGET_TB_PCREL, reduce reliance on absolute values.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/arm/translate.c | 38 +-
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/target/arm/translate.c b/target/arm/translate.c
index ca128edab7..5f6bd9b5b7 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -260,23 +260,22 @@ static inline int get_a32_user_mem_index(DisasContext *s)
 }
 }
 
-/* The architectural value of PC.  */
-static uint32_t read_pc(DisasContext *s)
-{
-return s->pc_curr + (s->thumb ? 4 : 8);
-}
-
 /* The pc_curr difference for an architectural jump. */
 static target_long jmp_diff(DisasContext *s, target_long diff)
 {
 return diff + (s->thumb ? 4 : 8);
 }
 
+static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
+{
+tcg_gen_movi_i32(var, s->pc_curr + diff);
+}
+
 /* Set a variable to the value of a CPU register.  */
 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 {
 if (reg == 15) {
-tcg_gen_movi_i32(var, read_pc(s));
+gen_pc_plus_diff(s, var, jmp_diff(s, 0));
 } else {
 tcg_gen_mov_i32(var, cpu_R[reg]);
 }
@@ -292,7 +291,11 @@ TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 TCGv_i32 tmp = tcg_temp_new_i32();
 
 if (reg == 15) {
-tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
+/*
+ * This address is computed from an aligned PC:
+ * subtract off the low bits.
+ */
+gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
 } else {
 tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 }
@@ -1155,7 +1158,7 @@ void unallocated_encoding(DisasContext *s)
 /* Force a TB lookup after an instruction that changes the CPU state.  */
 void gen_lookup_tb(DisasContext *s)
 {
-tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
+gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
 s->base.is_jmp = DISAS_EXIT;
 }
 
@@ -6479,7 +6482,7 @@ static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
 return false;
 }
 tmp = load_reg(s, a->rm);
-tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
+gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
 gen_bx(s, tmp);
 return true;
 }
@@ -8347,7 +8350,7 @@ static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
 
 static bool trans_BL(DisasContext *s, arg_i *a)
 {
-tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
+gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
 gen_jmp(s, jmp_diff(s, a->imm));
 return true;
 }
@@ -8366,7 +8369,7 @@ static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
 if (s->thumb && (a->imm & 2)) {
 return false;
 }
-tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
+gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
 store_cpu_field_constant(!s->thumb, thumb);
 /* This jump is computed from an aligned PC: subtract off the low bits. */
 gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
@@ -8376,7 +8379,7 @@ static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
 {
 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
-tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
+gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
 return true;
 }
 
@@ -8386,7 +8389,7 @@ static bool trans_BL_suffix(DisasContext *s, 
arg_BL_suffix *a)
 
 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
 tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
-tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
+gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
 gen_bx(s, tmp);
 return true;
 }
@@ -8402,7 +8405,7 @@ static bool trans_BLX_suffix(DisasContext *s, 
arg_BLX_suffix *a)
 tmp = tcg_temp_new_i32();
 tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
 tcg_gen_andi_i32(tmp, tmp, 0xfffc);
-tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
+gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
 gen_bx(s, tmp);
 return true;
 }
@@ -8725,10 +8728,11 @@ static bool op_tbranch(DisasContext *s, arg_tbranch *a, 
bool half)
 tcg_gen_add_i32(addr, addr, tmp);
 
 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
-tcg_temp_free_i32(addr);
 
 tcg_gen_add_i32(tmp, tmp, tmp);
-tcg_gen_addi_i32(tmp, tmp, read_pc(s));
+gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
+tcg_gen_add_i32(tmp, tmp, addr);
+tcg_temp_free_i32(addr);
 store_reg(s, 15, tmp);
 return true;
 }
-- 
2.34.1




[PATCH v8 6/9] target/arm: Change gen_jmp* to work on displacements

2022-10-19 Thread Richard Henderson
In preparation for TARGET_TB_PCREL, reduce reliance on absolute values.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/translate.c | 37 +
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/target/arm/translate.c b/target/arm/translate.c
index 9104ab8232..ca128edab7 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -266,6 +266,12 @@ static uint32_t read_pc(DisasContext *s)
 return s->pc_curr + (s->thumb ? 4 : 8);
 }
 
+/* The pc_curr difference for an architectural jump. */
+static target_long jmp_diff(DisasContext *s, target_long diff)
+{
+return diff + (s->thumb ? 4 : 8);
+}
+
 /* Set a variable to the value of a CPU register.  */
 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 {
@@ -2592,7 +2598,7 @@ static void gen_goto_ptr(void)
  * cpu_loop_exec. Any live exit_requests will be processed as we
  * enter the next TB.
  */
-static void gen_goto_tb(DisasContext *s, int n, int diff)
+static void gen_goto_tb(DisasContext *s, int n, target_long diff)
 {
 target_ulong dest = s->pc_curr + diff;
 
@@ -2608,10 +2614,8 @@ static void gen_goto_tb(DisasContext *s, int n, int diff)
 }
 
 /* Jump, specifying which TB number to use if we gen_goto_tb() */
-static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno)
+static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
 {
-int diff = dest - s->pc_curr;
-
 if (unlikely(s->ss_active)) {
 /* An indirect jump so that we still trigger the debug exception.  */
 gen_update_pc(s, diff);
@@ -2653,9 +2657,9 @@ static inline void gen_jmp_tb(DisasContext *s, uint32_t 
dest, int tbno)
 }
 }
 
-static inline void gen_jmp(DisasContext *s, uint32_t dest)
+static inline void gen_jmp(DisasContext *s, target_long diff)
 {
-gen_jmp_tb(s, dest, 0);
+gen_jmp_tb(s, diff, 0);
 }
 
 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
@@ -8322,7 +8326,7 @@ static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
 
 static bool trans_B(DisasContext *s, arg_i *a)
 {
-gen_jmp(s, read_pc(s) + a->imm);
+gen_jmp(s, jmp_diff(s, a->imm));
 return true;
 }
 
@@ -8337,14 +8341,14 @@ static bool trans_B_cond_thumb(DisasContext *s, arg_ci 
*a)
 return true;
 }
 arm_skip_unless(s, a->cond);
-gen_jmp(s, read_pc(s) + a->imm);
+gen_jmp(s, jmp_diff(s, a->imm));
 return true;
 }
 
 static bool trans_BL(DisasContext *s, arg_i *a)
 {
 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
-gen_jmp(s, read_pc(s) + a->imm);
+gen_jmp(s, jmp_diff(s, a->imm));
 return true;
 }
 
@@ -8364,7 +8368,8 @@ static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
 }
 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
 store_cpu_field_constant(!s->thumb, thumb);
-gen_jmp(s, (read_pc(s) & ~3) + a->imm);
+/* This jump is computed from an aligned PC: subtract off the low bits. */
+gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
 return true;
 }
 
@@ -8525,10 +8530,10 @@ static bool trans_WLS(DisasContext *s, arg_WLS *a)
  * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
  */
 }
-gen_jmp_tb(s, s->base.pc_next, 1);
+gen_jmp_tb(s, curr_insn_len(s), 1);
 
 gen_set_label(nextlabel);
-gen_jmp(s, read_pc(s) + a->imm);
+gen_jmp(s, jmp_diff(s, a->imm));
 return true;
 }
 
@@ -8608,7 +8613,7 @@ static bool trans_LE(DisasContext *s, arg_LE *a)
 
 if (a->f) {
 /* Loop-forever: just jump back to the loop start */
-gen_jmp(s, read_pc(s) - a->imm);
+gen_jmp(s, jmp_diff(s, -a->imm));
 return true;
 }
 
@@ -8639,7 +8644,7 @@ static bool trans_LE(DisasContext *s, arg_LE *a)
 tcg_temp_free_i32(decr);
 }
 /* Jump back to the loop start */
-gen_jmp(s, read_pc(s) - a->imm);
+gen_jmp(s, jmp_diff(s, -a->imm));
 
 gen_set_label(loopend);
 if (a->tp) {
@@ -8647,7 +8652,7 @@ static bool trans_LE(DisasContext *s, arg_LE *a)
 store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
 }
 /* End TB, continuing to following insn */
-gen_jmp_tb(s, s->base.pc_next, 1);
+gen_jmp_tb(s, curr_insn_len(s), 1);
 return true;
 }
 
@@ -8746,7 +8751,7 @@ static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
 tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
 tmp, 0, s->condlabel);
 tcg_temp_free_i32(tmp);
-gen_jmp(s, read_pc(s) + a->imm);
+gen_jmp(s, jmp_diff(s, a->imm));
 return true;
 }
 
-- 
2.34.1




Re: [PATCH 4/4] target/i386: implement FMA instructions

2022-10-19 Thread Richard Henderson

On 10/20/22 01:06, Paolo Bonzini wrote:

The only issue with FMA instructions is that there are _a lot_ of them
(30 opcodes, each of which comes in up to 4 versions depending on VEX.W
and VEX.L).

We can reduce the number of helpers to one third by passing four operands
(one output and three inputs); the reordering of which operands go to
the multiply and which go to the add is done in emit.c.

Scalar versions do not do any merging; they only affect the bottom 32
or 64 bits of the output operand.  Therefore, there is no separate XMM
and YMM of the scalar helpers.

Signed-off-by: Paolo Bonzini 
---
  target/i386/cpu.c|  5 ++-
  target/i386/ops_sse.h| 63 
  target/i386/ops_sse_header.h | 28 ++
  target/i386/tcg/decode-new.c.inc | 38 +++
  target/i386/tcg/decode-new.h |  1 +
  target/i386/tcg/emit.c.inc   | 43 ++
  tests/tcg/i386/test-avx.py   |  2 +-
  7 files changed, 177 insertions(+), 3 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 6292b7e12f..22b681ca37 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -625,10 +625,11 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */   \
CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR | \
-  CPUID_EXT_RDRAND | CPUID_EXT_AVX | CPUID_EXT_F16C)
+  CPUID_EXT_RDRAND | CPUID_EXT_AVX | CPUID_EXT_F16C | \
+  CPUID_EXT_FMA)
/* missing:
CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
-  CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
+  CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID,
CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER */
  
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h

index 33c61896ee..041a048a70 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -2522,6 +2522,69 @@ void helper_vpermd_ymm(Reg *d, Reg *v, Reg *s)
  }
  #endif
  
+/* FMA3 op helpers */

+#if SHIFT == 1
+#define SSE_HELPER_FMAS(name, elem, F) 
\
+void name(CPUX86State *env, Reg *d, Reg *a, Reg *b, Reg *c)
\
+{  
\
+d->elem(0) = F(a->elem(0), b->elem(0), c->elem(0));
\
+}
+#define SSE_HELPER_FMAP(name, elem, num, F)
\
+void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *a, Reg *b, Reg *c)  
\
+{  
\
+int i; 
\
+for (i = 0; i < num; i++) {
\
+d->elem(i) = F(a->elem(i), b->elem(i), c->elem(i));
\
+}  
\
+}
+
+#define FMADD32(a, b, c) float32_muladd(a, b, c, 0, >sse_status)
+#define FMADD64(a, b, c) float64_muladd(a, b, c, 0, >sse_status)
+
+#define FMNADD32(a, b, c) float32_muladd(a, b, c, float_muladd_negate_product, 
>sse_status)
+#define FMNADD64(a, b, c) float64_muladd(a, b, c, float_muladd_negate_product, 
>sse_status)
+
+#define FMSUB32(a, b, c) float32_muladd(a, b, c, float_muladd_negate_c, 
>sse_status)
+#define FMSUB64(a, b, c) float64_muladd(a, b, c, float_muladd_negate_c, 
>sse_status)
+
+#define FMNSUB32(a, b, c) float32_muladd(a, b, c, 
float_muladd_negate_c|float_muladd_negate_product, >sse_status)
+#define FMNSUB64(a, b, c) float64_muladd(a, b, c, 
float_muladd_negate_c|float_muladd_negate_product, >sse_status)
+
+#define FMADDSUB32(a, b, c) float32_muladd(a, b, c, (i & 1) ? 0 : 
float_muladd_negate_c, >sse_status)
+#define FMADDSUB64(a, b, c) float64_muladd(a, b, c, (i & 1) ? 0 : 
float_muladd_negate_c, >sse_status)
+
+#define FMSUBADD32(a, b, c) float32_muladd(a, b, c, (i & 1) ? float_muladd_negate_c : 
0, >sse_status)
+#define FMSUBADD64(a, b, c) float64_muladd(a, b, c, (i & 1) ? float_muladd_negate_c : 
0, >sse_status)
+
+SSE_HELPER_FMAS(helper_fmaddss,  ZMM_S, FMADD32)
+SSE_HELPER_FMAS(helper_fmaddsd,  ZMM_D, FMADD64)
+SSE_HELPER_FMAS(helper_fmnaddss, ZMM_S, FMNADD32)
+SSE_HELPER_FMAS(helper_fmnaddsd, ZMM_D, FMNADD64)
+SSE_HELPER_FMAS(helper_fmsubss,  ZMM_S, FMSUB32)
+SSE_HELPER_FMAS(helper_fmsubsd,  ZMM_D, FMSUB64)
+SSE_HELPER_FMAS(helper_fmnsubss, ZMM_S, FMNSUB32)
+SSE_HELPER_FMAS(helper_fmnsubsd, ZMM_D, FMNSUB64)


Would it be worth passing the muladd constant(s) as a parameter to a reduced number of 
helper functions?


E.g.

void fmas_name(..., int flags)
{
   

Re: [PATCH 3/4] target/i386: implement F16C instructions

2022-10-19 Thread Richard Henderson

On 10/20/22 01:06, Paolo Bonzini wrote:

F16C only consists of two instructions, which are a bit peculiar
nevertheless.

First, they access only the low half of an YMM or XMM register for the
packed-half operand; the exact size still depends on the VEX.L flag.
This is similar to the existing avx_movx flag, but not exactly because
avx_movx is hardcoded to affect operand 2.  To this end I added a "ph"
format name; it's possible to reuse this approach for the VPMOVSX and
VPMOVZX instructions, though that would also require adding two more
formats for the low-quarter and low-eighth of an operand.

Second, VCVTPS2PH is somewhat weird because it*stores*  the result of
the instruction into memory rather than loading it.

Signed-off-by: Paolo Bonzini
---
  target/i386/cpu.c|  5 ++---
  target/i386/cpu.h|  3 +++
  target/i386/ops_sse.h| 29 +
  target/i386/ops_sse_header.h |  6 ++
  target/i386/tcg/decode-new.c.inc |  8 
  target/i386/tcg/decode-new.h |  2 ++
  target/i386/tcg/emit.c.inc   | 17 -
  tests/tcg/i386/test-avx.c| 17 +
  tests/tcg/i386/test-avx.py   |  8 ++--
  9 files changed, 89 insertions(+), 6 deletions(-)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH 1/4] target/i386: decode-new: avoid out-of-bounds access to xmm_regs[-1]

2022-10-19 Thread Richard Henderson

On 10/20/22 01:06, Paolo Bonzini wrote:

If the destination is a memory register, op->n is -1.  Going through
tcg_gen_gvec_dup_imm path is both useless (the value has been stored
by the gen_* function already) and wrong because of the out-of-bounds
access.

Signed-off-by: Paolo Bonzini 
---
  target/i386/tcg/emit.c.inc | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Richard Henderson 

r~




Re: [PATCH 2/4] target/i386: introduce function to set rounding mode from FPCW or MXCSR bits

2022-10-19 Thread Richard Henderson

On 10/20/22 01:06, Paolo Bonzini wrote:

VROUND, FSTCW and STMXCSR all have to perform the same conversion from
x86 rounding modes to softfloat constants.  Since the ISA is consistent
on the meaning of the two-bit rounding modes, extract the common code
into a wrapper for set_float_rounding_mode.

Signed-off-by: Paolo Bonzini
---
  target/i386/ops_sse.h| 60 +++-
  target/i386/tcg/fpu_helper.c | 60 +---
  2 files changed, 25 insertions(+), 95 deletions(-)


Reviewed-by: Richard Henderson 


r~



Re: [PATCH v2 2/2] util/log: Always send errors to logfile when daemonized

2022-10-19 Thread Richard Henderson

On 10/20/22 01:16, Greg Kurz wrote:

When QEMU is started with `-daemonize`, all stdio descriptors get
redirected to `/dev/null`. This basically means that anything
printed with error_report() and friends is lost.

One could hope that passing `-D ${logfile}` would cause the messages
to go to `${logfile}`, as the documentation tends to suggest:

   -D logfile
   Output log in logfile instead of to stderr

Unfortunately, `-D` belongs to the logging framework and it only
does this redirection if some log item is also enabled with `-d`
or if QEMU was configured with `--enable-trace-backend=log`. A
typical production setup doesn't do tracing or fine-grain
debugging but it certainly needs to collect errors.

Ignore the check on enabled log items when QEMU is daemonized. Previous
behaviour is retained for the non-daemonized case. The logic is unrolled
as an `if` for better readability. Since qemu_set_log_internal() caches
the final log level and the per-thread property in global variables, it
seems more correct to check these instead of intermediary local variables.

Special care is needed for the `-D ${logfile} -d tid` case : `${logfile}`
is expected to be a template that contains exactly one `%d` that should be
expanded to a PID or TID. The logic in qemu_log_trylock() already takes
care of that for per-thread logs. Do it as well for the QEMU main thread
when opening the file.


I don't understand why daemonize changes -d tid at all.
If there's a bug there, please separate it out.

I don't understand the is_main_log_thread checks.
Why is the main thread special?


-/*
- * In all cases we only log if qemu_loglevel is set.
- * Also:
- *   If per-thread, open the file for each thread in qemu_log_lock.
- *   If not daemonized we will always log either to stderr
- * or to a file (if there is a filename).
- *   If we are daemonized, we will only log if there is a filename.
- */
  daemonized = is_daemonized();
-need_to_open_file = log_flags && !per_thread && (!daemonized || filename);
+need_to_open_file = false;
+if (!daemonized) {
+/*
+ * If not daemonized we only log if qemu_loglevel is set, either to
+ * stderr or to a file (if there is a filename).
+ * If per-thread, open the file for each thread in qemu_log_trylock().
+ */
+need_to_open_file = qemu_loglevel && !log_per_thread;
+} else {
+/*
+ * If we are daemonized, we will only log if there is a filename.
+ */
+need_to_open_file = filename != NULL;
+}


I would have thought that this was the only change required -- ignoring qemu_loglevel when 
daemonized.



r~



Re: [PATCH] MAINTAINERS: target/s390x/: add Ilya as reviewer

2022-10-19 Thread Richard Henderson

On 10/19/22 22:56, Christian Borntraeger wrote:

Ilya has volunteered to review TCG patches for s390x.

Signed-off-by: Christian Borntraeger 
---
  MAINTAINERS | 1 +
  1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e3d5b7e09c46..ae5e8c8ecbb6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -305,6 +305,7 @@ F: target/rx/
  S390 TCG CPUs
  M: Richard Henderson 
  M: David Hildenbrand 
+R: Ilya Leoshkevich 
  S: Maintained
  F: target/s390x/
  F: target/s390x/tcg


Acked-by: Richard Henderson 

r~



Re: [PATCH v2] vhost-vdpa: add support for vIOMMU

2022-10-19 Thread Jason Wang
On Wed, Oct 19, 2022 at 2:39 PM Cindy Lu  wrote:
>
> Add support for vIOMMU. Register a memory listener to dma_as in
> dev_start
> - during region_add register a specific IOMMU notifier, and store all 
> notifiers in a list.
> - during region_del, compare and delete the IOMMU notifier from the list
>
> Verified in vp_vdpa and vdpa_sim_net driver
>
> Signed-off-by: Cindy Lu 
> ---
>  hw/virtio/vhost-vdpa.c | 218 -
>  include/hw/virtio/vhost-vdpa.h |  11 ++
>  2 files changed, 226 insertions(+), 3 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 3ff9ce3501..8f36423c28 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -26,6 +26,7 @@
>  #include "cpu.h"
>  #include "trace.h"
>  #include "qapi/error.h"
> +#include "hw/virtio/virtio-access.h"
>
>  /*
>   * Return one past the end of the end of section. Be careful with uint64_t
> @@ -146,9 +147,8 @@ static void vhost_vdpa_iotlb_batch_begin_once(struct 
> vhost_vdpa *v)
>  v->iotlb_batch_begin_sent = true;
>  }
>
> -static void vhost_vdpa_listener_commit(MemoryListener *listener)
> +static void vhost_vdpa_iotlb_batch_end(struct vhost_vdpa *v)
>  {
> -struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, 
> listener);
>  struct vhost_dev *dev = v->dev;
>  struct vhost_msg_v2 msg = {};
>  int fd = v->device_fd;
> @@ -172,6 +172,18 @@ static void vhost_vdpa_listener_commit(MemoryListener 
> *listener)
>
>  v->iotlb_batch_begin_sent = false;
>  }
> +static void vhost_vdpa_listener_commit(MemoryListener *listener)
> +{
> +struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, 
> listener);
> +return vhost_vdpa_iotlb_batch_end(v);
> +}
> +
> +static void vhost_vdpa_listener_iommu_commit(MemoryListener *listener)
> +{
> +struct vhost_vdpa *v =
> +container_of(listener, struct vhost_vdpa, iommu_listener);
> +return vhost_vdpa_iotlb_batch_end(v);
> +}
>
>  static void vhost_vdpa_listener_region_add(MemoryListener *listener,
> MemoryRegionSection *section)
> @@ -186,6 +198,9 @@ static void vhost_vdpa_listener_region_add(MemoryListener 
> *listener,
>  v->iova_range.last)) {
>  return;
>  }
> +if (memory_region_is_iommu(section->mr)) {
> +return;
> +}
>
>  if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) 
> !=
>   (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> @@ -260,6 +275,9 @@ static void vhost_vdpa_listener_region_del(MemoryListener 
> *listener,
>  v->iova_range.last)) {
>  return;
>  }
> +if (memory_region_is_iommu(section->mr)) {
> +return;
> +}
>
>  if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) 
> !=
>   (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> @@ -312,6 +330,180 @@ static const MemoryListener vhost_vdpa_memory_listener 
> = {
>  .region_del = vhost_vdpa_listener_region_del,
>  };
>
> +/* Called with rcu_read_lock held.  */
> +static bool vhost_vdpa_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
> + ram_addr_t *ram_addr, bool *read_only)

So this looks exact the same as what VFIO did, try to unify them?

> +{
> +MemoryRegion *mr;
> +hwaddr xlat;
> +hwaddr len = iotlb->addr_mask + 1;
> +bool writable = iotlb->perm & IOMMU_WO;
> +
> +/*
> + * The IOMMU TLB entry we have just covers translation through
> + * this IOMMU to its immediate target.  We need to translate
> + * it the rest of the way through to memory.
> + */
> +mr = address_space_translate(_space_memory, 
> iotlb->translated_addr,
> + , , writable, 
> MEMTXATTRS_UNSPECIFIED);
> +if (!memory_region_is_ram(mr)) {
> +error_report("iommu map to non memory area %" HWADDR_PRIx "", xlat);
> +return false;
> +} else if (memory_region_has_ram_discard_manager(mr)) {
> +RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
> +MemoryRegionSection tmp = {
> +.mr = mr,
> +.offset_within_region = xlat,
> +.size = int128_make64(len),
> +};
> +
> +if (!ram_discard_manager_is_populated(rdm, )) {
> +error_report("iommu map to discarded memory (e.g., unplugged via"
> + " virtio-mem): %" HWADDR_PRIx "",
> + iotlb->translated_addr);
> +return false;
> +}
> +}
> +/*
> + * Translation truncates length to the IOMMU page size,
> + * check that it did not truncate too much.
> + */
> +if (len & iotlb->addr_mask) {
> +error_report("iommu has granularity incompatible with target AS");
> +return false;
> +}
> +
> +if (vaddr) {
> + 

Re: [PATCH v4 5/5] test/acpi/bios-tables-test: SSDT: update golden master binaries

2022-10-19 Thread Robert Hoo
Ping...
On Fri, 2022-10-07 at 21:27 +0800, Robert Hoo wrote:
> Ping...
> On Tue, 2022-09-27 at 08:30 +0800, Robert Hoo wrote:
> > On Mon, 2022-09-26 at 15:22 +0200, Igor Mammedov wrote:
> > > > > 0800200c9a66"), One, 0x05, Local0, One)
> > > > > +CreateDWordField (Local3, Zero, STTS)
> > > > > +CreateField (Local3, 0x20, (LEN <<
> > > > > 0x03),
> > > > > LDAT)
> > > > > +Name (LSA, Buffer (Zero){})
> > > > > +ToBuffer (LDAT, LSA) /*
> > > > > \_SB_.NVDR.NV00._LSR.LSA_ */
> > > > > +Local1 = Package (0x02)
> > > > > +{
> > > > > +STTS,
> > > > > +LSA
> > > > > +}  
> > > > 
> > > > Hi Igor,
> > > > 
> > > > Here is a little different from original proposal 
> > > > https://lore.kernel.org/qemu-devel/80b09055416c790922c7c3db60d2ba865792d1b0.ca...@linux.intel.com/
> > > > 
> > > >Local1 = Package (0x2) {STTS, toBuffer(LDAT)}
> > > > 
> > > > Because in my test, Linux guest complains:
> > > > 
> > > > [3.884656] ACPI Error: AE_SUPPORT, Expressions within
> > > > package
> > > > elements are not supported (20220331/dspkginit-172)
> > > > [3.887104] ACPI Error: Aborting method \_SB.NVDR.NV00._LSR
> > > > due
> > > > to
> > > > previous error (AE_SUPPORT) (20220331/psparse-531)
> > > > 
> > > > 
> > > > So I have to move toBuffer() out of Package{} and name LSA to
> > > > hold
> > > > the
> > > > buffer. If you have better idea, pls. let me know.
> > > 
> > > Would something like following work?
> > > 
> > > LocalX =  Buffer (Zero){}
> > > LocalY = Package (0x01) { LocalX }
> > 
> > 
> > No, Package{} doesn't accept LocalX as elements.
> > 
> > PackageTerm :=
> > Package (
> > NumElements // Nothing | ByteConstExpr | TermArg => Integer
> > ) {PackageList} => Package
> > 
> > PackageList :=
> > Nothing | 
> > 
> > PackageElement :=
> > DataObject | NameString




Re: [PATCH v3 12/12] target/ppc: Use gvec to decode XVTSTDC[DS]P

2022-10-19 Thread Richard Henderson

On 10/19/22 22:50, Lucas Mateus Castro(alqotel) wrote:

From: "Lucas Mateus Castro (alqotel)"

Used gvec to translate XVTSTDCSP and XVTSTDCDP.

xvtstdcsp:
reptloopimm master version  prev versioncurrent version
25  40000   0,2062000,040730 (-80.2%)0,040740 (-80.2%)
25  40001   0,2051200,053650 (-73.8%)0,053510 (-73.9%)
25  40003   0,2061600,058630 (-71.6%)0,058570 (-71.6%)
25  400051  0,2171100,191490 (-11.8%)0,192320 (-11.4%)
25  4000127 0,2061600,191490 (-7.1%) 0,192640 (-6.6%)
800012  0   1,2347190,418833 (-66.1%)0,386365 (-68.7%)
800012  1   1,2324171,435979 (+16.5%)1,462792 (+18.7%)
800012  3   1,2327601,766073 (+43.3%)1,743990 (+41.5%)
800012  51  1,2392811,319562 (+6.5%) 1,423479 (+14.9%)
800012  127 1,2317081,315760 (+6.8%) 1,426667 (+15.8%)

xvtstdcdp:
reptloopimm master version  prev versioncurrent version
25  40000   0,1599300,040830 (-74.5%)0,040610 (-74.6%)
25  40001   0,1606400,053670 (-66.6%)0,053480 (-66.7%)
25  40003   0,1600200,063030 (-60.6%)0,062960 (-60.7%)
25  400051  0,1604100,128620 (-19.8%)0,127470 (-20.5%)
25  4000127 0,1603300,127670 (-20.4%)0,128690 (-19.7%)
800012  0   1,1903650,422146 (-64.5%)0,388417 (-67.4%)
800012  1   1,1912921,445312 (+21.3%)1,428698 (+19.9%)
800012  3   1,1886871,980656 (+66.6%)1,975354 (+66.2%)
800012  51  1,1912501,264500 (+6.1%) 1,355083 (+13.8%)
800012  127 1,1973131,266729 (+5.8%) 1,349156 (+12.7%)

Overall, these instructions are the hardest ones to measure performance
as the gvec implementation is affected by the immediate. Above there are
5 different scenarios when it comes to immediate and 2 when it comes to
rept/loop combination. The immediates scenarios are: all bits are 0
therefore the target register should just be changed to 0, with 1 bit
set, with 2 bits set in a combination the new implementation can deal
with using gvec, 4 bits set and the new implementation can't deal with
it using gvec and all bits set. The rept/loop scenarios are high loop
and low rept (so it should spend more time executing it than translating
it) and high rept low loop (so it should spend more time translating it
than executing this code).
These comparisons are between the upstream version, a previous similar
implementation and a one with a cleaner code(this one).
For a comparison with o previous different implementation:
<20221010191356.83659-13-lucas.ara...@eldorado.org.br>

Signed-off-by: Lucas Mateus Castro (alqotel)
---
  target/ppc/translate/vsx-impl.c.inc | 164 ++--
  1 file changed, 154 insertions(+), 10 deletions(-)


Reviewed-by: Richard Henderson 


r~



[PATCH] dump/win_dump: limit number of processed PRCBs

2022-10-19 Thread Viktor Prutyanov
When number of CPUs utilized by guest Windows is less than defined in
QEMU (i.e., desktop versions of Windows severely limits number of CPU
sockets), patch_and_save_context routine accesses non-existent PRCB and
fails. So, limit number of processed PRCBs by NumberProcessors taken
from guest Windows driver.

Signed-off-by: Viktor Prutyanov 
---
 dump/win_dump.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/dump/win_dump.c b/dump/win_dump.c
index fd91350fbb..f20b6051b6 100644
--- a/dump/win_dump.c
+++ b/dump/win_dump.c
@@ -273,6 +273,13 @@ static void patch_and_save_context(WinDumpHeader *h, bool 
x64,
 uint64_t Context;
 WinContext ctx;
 
+if (i >= WIN_DUMP_FIELD(NumberProcessors)) {
+warn_report("win-dump: number of QEMU CPUs is bigger than"
+" NumberProcessors (%u) in guest Windows",
+WIN_DUMP_FIELD(NumberProcessors));
+return;
+}
+
 if (cpu_read_ptr(x64, first_cpu,
 KiProcessorBlock + i * win_dump_ptr_size(x64),
 )) {
-- 
2.35.1




Re: [RFC PATCH] target/s390x: fake instruction loading when handling 'ex'

2022-10-19 Thread Richard Henderson

On 10/19/22 21:35, Alex Bennée wrote:

The s390x EXecute instruction is a bit weird as we synthesis the
executed instruction from what we have stored in memory. When plugins
are enabled this breaks because we detect the ld_code2() loading from
a non zero offset without the rest of the instruction being there.


Hmm.  The fact that you see an ld_code2 at all is incorrect.

(1) translator_lduw, which ld_code2 is using, is supposed to
be only for instructions that we're executing, per plugins.
But the usage you are seeing is a probe for the next insn,
which should not be included.

(2) We always exit the tb after EX, so the probe for the next
insn is also wrong.  We've got the tests in the wrong order:

if (!is_same_page(dcbase, dc->base.pc_next) ||
!is_same_page(dcbase, get_next_pc(env, dc, dc->base.pc_next)) ||
dc->ex_value) {

None of that takes away from the fact that we *should* have a way to report the EX 
instruction to the plugin.




+/**
+ * translator_fake_ldw - fake instruction load
+ * @insn16: 2 byte instruction
+ * @pc: program counter of instruction
+ *
+ * This is a special case helper used where the instruction we are
+ * about to translate comes from somewhere else (e.g. being
+ * re-synthesised for s390x "ex"). It ensures we update other areas of
+ * the translator with details of the executed instruction.
+ */
+
+static inline void translator_fake_ldw(uint16_t insn16, abi_ptr pc)
+{
+plugin_insn_append(pc, , sizeof(insn16));
+}


You're not handing the endianness of the two bytes.
I think you should just decompose all the way to fake_ldb.

I don't know much sense you're going to get out of the PC.  The EX instruction is at a 
particular address A, having loaded data from B.  The EX instruction is 4 bytes, but the 
insn at B may be 6 bytes.  The next insn executed may well be PC = A+4, apparently 
overlapping with the 6 byte insn you gave to the plugin just a minute ago.


But I don't know what else to report except [PC, PC+5], as you're doing.



r~



Re: [PATCH v5 6/6] hw/arm/virt: Add 'compact-highmem' property

2022-10-19 Thread Gavin Shan

Hi Eric,

On 10/20/22 4:18 AM, Eric Auger wrote:

On 10/12/22 01:18, Gavin Shan wrote:

After the improvement to high memory region address assignment is
applied, the memory layout can be changed, introducing possible
migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region
is disabled or enabled when the optimization is applied or not, with
the following configuration.

   pa_bits  = 40;
   vms->highmem_redists = false;
   vms->highmem_ecam= false;
   vms->highmem_mmio= true;

   # qemu-system-aarch64 -accel kvm -cpu host\
 -machine virt-7.2,compact-highmem={on, off} \
 -m 4G,maxmem=511G -monitor stdio

   Regioncompact-highmem=off compact-highmem=on
   
   RAM   [1GB 512GB][1GB 512GB]
   HIGH_GIC_REDISTS  [512GB   512GB+64MB]   [disabled]
   HIGH_PCIE_ECAM[512GB+256MB 512GB+512MB]  [disabled]
   HIGH_PCIE_MMIO[disabled] [512GB   1TB]

In order to keep backwords compatibility, we need to disable the
optimization on machines, which is virt-7.1 or ealier than it. It
means the optimization is enabled by default from virt-7.2. Besides,
'compact-highmem' property is added so that the optimization can be
explicitly enabled or disabled on all machine types by users.

Signed-off-by: Gavin Shan 
Tested-by: Zhenyu Zhang 
---
  docs/system/arm/virt.rst |  4 
  hw/arm/virt.c| 47 
  include/hw/arm/virt.h|  1 +
  3 files changed, 52 insertions(+)

diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 20442ea2c1..75bf5a4994 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -94,6 +94,10 @@ highmem
address space above 32 bits. The default is ``on`` for machine types
later than ``virt-2.12``.
  
+compact-highmem

+  Set ``on``/``off`` to enable/disable compact space for high memory regions.
+  The default is ``on`` for machine types later than ``virt-7.2``
+
  gic-version
Specify the version of the Generic Interrupt Controller (GIC) to provide.
Valid values are:
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index c05cfb5314..8f1dba0ece 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -174,6 +174,27 @@ static const MemMapEntry base_memmap[] = {
   * Note the extended_memmap is sized so that it eventually also includes the
   * base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last
   * index of base_memmap).
+ *
+ * The addresses assigned to these regions are affected by 'compact-highmem'
+ * property, which is to enable or disable the compact space in the Highmem
+ * IO regions. For example, VIRT_HIGH_PCIE_MMIO can be disabled or enabled
+ * depending on the property in the following scenario.

To me you shall rather explain here what is the so-called "compact"
space vs the legacy highmem layout.

If I understand correctly the example rather legitimates the use of a
compat option showing how the layout can be affected by the option. I
would put that in the commit msg instead. Also in your example I see
VIRT_HIGH_GIC_REDISTS is disabled but the code does not disable the
region excpet if it does not fit within the PA. This does not match your
example. Also the region is named VIRT_HIGH_GIC_REDIST2.

In v4, Marc also suggested to have individual options for each highmem
region.
https://lore.kernel.org/qemu-devel/0f8e6a58-0dde-fb80-6966-7bb32c4df...@redhat.com/

Have you considered that option?



I think your comments make sense to me. So lets put the following comments
to the code and move the example to commit log.

  /*
   * The memory map for these Highmem IO Regions can be in legacy or compact
   * layout, depending on 'compact-highmem' property. In legacy layout, the
   * PA space for one specific region is always reserved, even the region has
   * been disabled or doesn't fit into the PA space. However, the PA space for
   * the region won't be reserved in these circumstances.
   */

You're correct about the example. VIRT_HIGH_GIC_REDIST2 should be used. Besides,
the configuration is only achievable by modifying source code at present, until
Marc's suggestion rolls in to allow users disable one particular high memory
regions by more properties. I will amend the commit log to have something like
below.

For example, VIRT_HIGH_PCIE_MMIO memory region is disabled or enabled when
the optimization is applied or not, with the following configuration. The
configuration is only achievable by modifying source code, until more 
properties
are added to allow user selectively disable those high memory regions.

For Marc's suggestion to add properties so that these high memory regions can
be disabled by users. I can add one patch after this one to introduce the 
following
3 properties. Could you please confirm the property names are good enough? It's
nice if Marc can help to confirm before I'm 

Re: [PATCH v1 06/12] xen-hvm: move common functions to hw/xen/xen-hvm-common.c

2022-10-19 Thread Garhwal, Vikram
Thanks Paul & Julien for reviewing. I will update this in v2.

Regards,
Vikram

From: Paul Durrant 
Date: Wednesday, October 19, 2022 at 9:16 AM
To: Garhwal, Vikram , qemu-devel@nongnu.org 

Cc: Stabellini, Stefano , Stefano Stabellini 
, Anthony Perard , Michael 
S. Tsirkin , Marcel Apfelbaum , 
Paolo Bonzini , Richard Henderson 
, Eduardo Habkost , open 
list:X86 Xen CPUs 
Subject: Re: [PATCH v1 06/12] xen-hvm: move common functions to 
hw/xen/xen-hvm-common.c
On 15/10/2022 06:07, Vikram Garhwal wrote:
[snip]
> +qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
> +
> +state->memory_listener = xen_memory_listener;
> +memory_listener_register(>memory_listener, _space_memory);
> +
> +state->io_listener = xen_io_listener;
> +memory_listener_register(>io_listener, _space_io);
> +
> +state->device_listener = xen_device_listener;
> +QLIST_INIT(>dev_list);
> +device_listener_register(>device_listener);
> +

As Julien said, these do not belong here. These are the (current and
legacy) PV backend setup functions; they most certainly have nothing to
do with device emulation.

   Paul


Re: [PATCH v5 6/6] hw/arm/virt: Add 'compact-highmem' property

2022-10-19 Thread Gavin Shan

Hi Connie,

On 10/19/22 10:00 PM, Cornelia Huck wrote:

On Wed, Oct 12 2022, Gavin Shan  wrote:


After the improvement to high memory region address assignment is
applied, the memory layout can be changed, introducing possible
migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region
is disabled or enabled when the optimization is applied or not, with
the following configuration.

   pa_bits  = 40;
   vms->highmem_redists = false;
   vms->highmem_ecam= false;
   vms->highmem_mmio= true;

   # qemu-system-aarch64 -accel kvm -cpu host\
 -machine virt-7.2,compact-highmem={on, off} \
 -m 4G,maxmem=511G -monitor stdio

   Regioncompact-highmem=off compact-highmem=on
   
   RAM   [1GB 512GB][1GB 512GB]
   HIGH_GIC_REDISTS  [512GB   512GB+64MB]   [disabled]
   HIGH_PCIE_ECAM[512GB+256MB 512GB+512MB]  [disabled]
   HIGH_PCIE_MMIO[disabled] [512GB   1TB]

In order to keep backwords compatibility, we need to disable the
optimization on machines, which is virt-7.1 or ealier than it. It
means the optimization is enabled by default from virt-7.2. Besides,
'compact-highmem' property is added so that the optimization can be
explicitly enabled or disabled on all machine types by users.

Signed-off-by: Gavin Shan 
Tested-by: Zhenyu Zhang 
---
  docs/system/arm/virt.rst |  4 
  hw/arm/virt.c| 47 
  include/hw/arm/virt.h|  1 +
  3 files changed, 52 insertions(+)

diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 20442ea2c1..75bf5a4994 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -94,6 +94,10 @@ highmem
address space above 32 bits. The default is ``on`` for machine types
later than ``virt-2.12``.
  
+compact-highmem

+  Set ``on``/``off`` to enable/disable compact space for high memory regions.


Maybe s/compact space/the compact layout/ ?



Yeah, 'compact layout' is better. I will amend in next respin.


+  The default is ``on`` for machine types later than ``virt-7.2``
+
  gic-version
Specify the version of the Generic Interrupt Controller (GIC) to provide.
Valid values are:
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index c05cfb5314..8f1dba0ece 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -174,6 +174,27 @@ static const MemMapEntry base_memmap[] = {
   * Note the extended_memmap is sized so that it eventually also includes the
   * base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last
   * index of base_memmap).
+ *
+ * The addresses assigned to these regions are affected by 'compact-highmem'
+ * property, which is to enable or disable the compact space in the Highmem


s/space in/layout for/ ?



Agreed.


+ * IO regions. For example, VIRT_HIGH_PCIE_MMIO can be disabled or enabled
+ * depending on the property in the following scenario.
+ *
+ * pa_bits  = 40;
+ * vms->highmem_redists = false;
+ * vms->highmem_ecam= false;
+ * vms->highmem_mmio= true;
+ *
+ * # qemu-system-aarch64 -accel kvm -cpu host\
+ *   -machine virt-7.2,compact-highmem={on, off} \
+ *   -m 4G,maxmem=511G -monitor stdio
+ *
+ * Regioncompact-highmem=offcompact-highmem=on
+ * 
+ * RAM   [1GB 512GB][1GB 512GB]
+ * HIGH_GIC_REDISTS  [512GB   512GB+64MB]   [disabled]
+ * HIGH_PCIE_ECAM[512GB+256GB 512GB+512MB]  [disabled]
+ * HIGH_PCIE_MMIO[disabled] [512GB   1TB]
   */
  static MemMapEntry extended_memmap[] = {
  /* Additional 64 MB redist region (can contain up to 512 redistributors) 
*/


(...)


@@ -3124,8 +3167,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 2)
  
  static void virt_machine_7_1_options(MachineClass *mc)

  {
+VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
  virt_machine_7_2_options(mc);
  compat_props_add(mc->compat_props, hw_compat_7_1, hw_compat_7_1_len);
+/* Compact space for high memory regions was introduced with 7.2 */


s/space/layout/ ?



Ack.


+vmc->no_highmem_compact = true;
  }
  DEFINE_VIRT_MACHINE(7, 1)
  


Other than the wording nits, lgtm.

Reviewed-by: Cornelia Huck 



Thanks,
Gavin




Re: [PATCH v5 5/6] hw/arm/virt: Improve high memory region address assignment

2022-10-19 Thread Gavin Shan

Hi Eric,

On 10/20/22 4:07 AM, Eric Auger wrote:

On 10/12/22 01:18, Gavin Shan wrote:

There are three high memory regions, which are VIRT_HIGH_REDIST2,
VIRT_HIGH_PCIE_ECAM and VIRT_HIGH_PCIE_MMIO. Their base addresses
are floating on highest RAM address. However, they can be disabled
in several cases.

(1) One specific high memory region is disabled by developer by
 toggling vms->highmem_{redists, ecam, mmio}.

I would replace the above sentence by

One specific high memory region is likely to be disabled by the code by toggling 
vms->highmem_{redists, ecam, mmio}:



Ok.



(2) VIRT_HIGH_PCIE_ECAM region is disabled on machine, which is
 'virt-2.12' or ealier than it.

(3) VIRT_HIGH_PCIE_ECAM region is disabled when firmware is loaded
 on 32-bits system.

(4) One specific high memory region is disabled when it breaks the
 PA space limit.

The current implementation of virt_set_memmap() isn't comprehensive
because the space for one specific high memory region is always
reserved from the PA space for case (1), (2) and (3).

I would suggest:
isn't optimized because the high memory region PA range is always

reserved whatever the actual state of the corresponding vms->highmem_
* flag.



Ok. I will have something like below in next revision.

  The current implementation of virt_set_{memmap, high_memmap}() isn't
  optimized because the high memory region's PA space is always reserved,
  regardless of whatever the actual state in the corresponding
  vms->highmem_{redists, ecam, mmio} flag. In the code, 


  In the code,
'base' and 'vms->highest_gpa' are always increased for those three
cases. It's unnecessary since the assigned space of the disabled
high memory region won't be used afterwards.

This improves the address assignment for those three high memory

s/This improves/Improve


Ok.


region by skipping the address assignment for one specific high
memory region if it has been disabled in case (1), (2) and (3).
'vms->high_compact' is false for now, meaning that we don't have

s/hight_compat/highmem_compact

You also may justify the introduction of this new field.


Thanks. It should be 'highmem_compact'. Yes, it makes sense to
justify the addition of 'vms->highmem_compact'. I will have something
like below in next revision.

  The memory layout may be changed after the improvement is applied, which
  leads to potential migration breakage. So 'vms->highmem_compact' is added
  to control if the improvement should be applied. For now, 
'vms->highmem_compact'
  is set to false, meaning that we don't have memory layout change until it
  becomes configurable through property 'compact-highmem' in next patch.


any behavior changes until it becomes configurable through property
'compact-highmem' in next patch.

Signed-off-by: Gavin Shan 

Tested-by: Zhenyu Zhang 
---
  hw/arm/virt.c | 23 +++
  include/hw/arm/virt.h |  1 +
  2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ee98a8a3b6..c05cfb5314 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1717,22 +1717,29 @@ static void virt_set_high_memmap(VirtMachineState *vms,
  region_base = ROUND_UP(base, extended_memmap[i].size);
  region_size = extended_memmap[i].size;
  
-vms->memmap[i].base = region_base;

-vms->memmap[i].size = region_size;
-
  /*
   * Check each device to see if they fit in the PA space,

while we are at it, you can change s/they fit/it fits


Agreed.


- * moving highest_gpa as we go.
+ * moving highest_gpa as we go. For compatibility, move
+ * highest_gpa for disabled fitting devices as well, if
+ * the compact layout has been disabled.
   *
   * For each device that doesn't fit, disable it.
   */
  fits = (region_base + region_size) <= BIT_ULL(pa_bits);
-if (fits) {
+if (*region_enabled && fits) {
+vms->memmap[i].base = region_base;
+vms->memmap[i].size = region_size;
  vms->highest_gpa = region_base + region_size - 1;
+base = region_base + region_size;
+} else {
+*region_enabled = false;
+if (!vms->highmem_compact) {
+base = region_base + region_size;
+if (fits) {
+vms->highest_gpa = region_base + region_size - 1;
+}
+}
  }
-
-*region_enabled &= fits;
-base = region_base + region_size;
  }
  }

This looks quite complicated to me. It is not obvious for instance we
have the same code as before when highmem_compact is not set. Typically

vms->memmap[i].base/size are not always set as they were to be and impact on 
the rest of the code must be double checked.

Could this be rewritten in that way (pseudocode totally untested).


static void fit_highmem_slot(vms, *base, i, pa_bits)
{
     region_enabled = 

[PATCH 5/8] Hexagon (target/hexagon) Add overrides for compound compare and jump

2022-10-19 Thread Taylor Simpson
Signed-off-by: Taylor Simpson 
---
 target/hexagon/gen_tcg.h | 177 +++
 target/hexagon/genptr.c  |  74 
 2 files changed, 251 insertions(+)

diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index ad149adbe1..b56b216110 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -620,6 +620,183 @@
 #define fGEN_TCG_J2_callf(SHORTCODE) \
 gen_cond_call(ctx, pkt, PuV, false, riV)
 
+/*
+ * Compound compare and jump instructions
+ * Here is a primer to understand the tag names
+ *
+ * Comparison
+ *  cmpeqi   compare equal to an immediate
+ *  cmpgti   compare greater than an immediate
+ *  cmpgtiu  compare greater than an unsigned immediate
+ *  cmpeqn1  compare equal to negative 1
+ *  cmpgtn1  compare greater than negative 1
+ *  cmpeqcompare equal (two registers)
+ *  cmpgtu   compare greater than unsigned (two registers)
+ *  tstbit0  test bit zero
+ *
+ * Condition
+ *  tp0  p0 is true p0 = cmp.eq(r0,#5); if (p0.new) jump:nt address
+ *  fp0  p0 is falsep0 = cmp.eq(r0,#5); if (!p0.new) jump:nt 
address
+ *  tp1  p1 is true p1 = cmp.eq(r0,#5); if (p1.new) jump:nt address
+ *  fp1  p1 is falsep1 = cmp.eq(r0,#5); if (!p1.new) jump:nt 
address
+ *
+ * Prediction (not modelled in qemu)
+ *  _nt  not taken
+ *  _t   taken
+ */
+#define fGEN_TCG_J4_cmpeq_tp0_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_EQ, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_tp0_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_EQ, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp0_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_EQ, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp0_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_EQ, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_tp1_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_EQ, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_tp1_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_EQ, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp1_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_EQ, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp1_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_EQ, false, RsV, RtV, riV)
+
+#define fGEN_TCG_J4_cmpgt_tp0_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_GT, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_tp0_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_GT, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp0_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_GT, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp0_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_GT, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_tp1_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_GT, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_tp1_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_GT, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp1_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_GT, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp1_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_GT, false, RsV, RtV, riV)
+
+#define fGEN_TCG_J4_cmpgtu_tp0_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_GTU, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_tp0_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_GTU, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp0_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_GTU, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp0_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 0, TCG_COND_GTU, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_tp1_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_GTU, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_tp1_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_GTU, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp1_jump_t(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_GTU, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp1_jump_nt(SHORTCODE) \
+gen_cmpnd_cmp_jmp(ctx, pkt, insn, 1, TCG_COND_GTU, false, RsV, RtV, riV)
+
+#define fGEN_TCG_J4_cmpeqi_tp0_jump_t(SHORTCODE) \
+gen_cmpnd_cmpi_jmp(ctx, pkt, insn, 0, TCG_COND_EQ, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_tp0_jump_nt(SHORTCODE) \
+gen_cmpnd_cmpi_jmp(ctx, pkt, insn, 0, TCG_COND_EQ, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_fp0_jump_t(SHORTCODE) \
+gen_cmpnd_cmpi_jmp(ctx, pkt, insn, 0, TCG_COND_EQ, false, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_fp0_jump_nt(SHORTCODE) \
+gen_cmpnd_cmpi_jmp(ctx, pkt, 

[PATCH 0/8] Hexagon (target/hexagon) Improve change-of-flow

2022-10-19 Thread Taylor Simpson
This patch series improves change-of-flow handling.

Currently, we set the PC to a new address before exiting a TB.  The
ultimate goal is to use direct block chaining.  However, several steps
are needed along the way.

1)
When a packet has more than one change-of-flow (COF) instruction, only
the first one taken is considered.  The runtime bookkeeping is only
needed when there is more than one COF instruction in a packet.

2, 3)
Remove PC and next_PC from the runtime state and always use a
translation-time constant.  Note that next_PC is used by call instructions
to set LR and by conditional COF instructions to set the fall-through
address.

4, 5, 6)
Add helper overrides for COF instructions.  In particular, we must
distinguish those that use a PC-relative address for the destination.
These are candidates for direct block chaining later.

7)
Use direct block chaining for packets that have a single PC-relative
COF instruction.  Instead of generating the code while processing the
instruction, we record the effect in DisasContext and generate the code
during gen_end_tb.

8)
Use direct block chaining for tight loops.  We look for TBs that end
with an endloop0 that will branch back to the TB start address.




Taylor Simpson (8):
  Hexagon (target/hexagon) Only use branch_taken when packet has multi
cof
  Hexagon (target/hexagon) Remove PC from the runtime state
  Hexagon (target/hexagon) Remove next_PC from runtime state
  Hexagon (target/hexagon) Add overrides for direct call instructions
  Hexagon (target/hexagon) Add overrides for compound compare and jump
  Hexagon (target/hexagon) Add overrides for various forms of jump
  Hexagon (target/hexagon) Use direct block chaining for direct
jump/branch
  Hexagon (target/hexagon) Use direct block chaining for tight loops

 target/hexagon/cpu.h|  18 +-
 target/hexagon/gen_tcg.h| 391 
 target/hexagon/insn.h   |   2 +
 target/hexagon/macros.h |   6 +-
 target/hexagon/translate.h  |   6 +-
 target/hexagon/decode.c |  15 +-
 target/hexagon/genptr.c | 278 
 target/hexagon/op_helper.c  |  28 +-
 target/hexagon/translate.c  | 120 +++--
 target/hexagon/gen_helper_funcs.py  |  11 +
 target/hexagon/gen_helper_protos.py |  12 +-
 target/hexagon/gen_tcg_funcs.py |  11 +
 target/hexagon/hex_common.py|  34 ++-
 13 files changed, 887 insertions(+), 45 deletions(-)

-- 
2.17.1



[PATCH 4/8] Hexagon (target/hexagon) Add overrides for direct call instructions

2022-10-19 Thread Taylor Simpson
Add overrides for
J2_call
J2_callt
J2_callf

Signed-off-by: Taylor Simpson 
---
 target/hexagon/gen_tcg.h |  8 ++
 target/hexagon/genptr.c  | 59 
 2 files changed, 67 insertions(+)

diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index e6fc7d97d2..ad149adbe1 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -612,6 +612,14 @@
 tcg_temp_free(tmp); \
 } while (0)
 
+#define fGEN_TCG_J2_call(SHORTCODE) \
+gen_call(ctx, pkt, riV)
+
+#define fGEN_TCG_J2_callt(SHORTCODE) \
+gen_cond_call(ctx, pkt, PuV, true, riV)
+#define fGEN_TCG_J2_callf(SHORTCODE) \
+gen_cond_call(ctx, pkt, PuV, false, riV)
+
 #define fGEN_TCG_J2_pause(SHORTCODE) \
 do { \
 uiV = uiV; \
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 806d0974ff..4b43b3f5c8 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -456,6 +456,65 @@ static TCGv gen_8bitsof(TCGv result, TCGv value)
 return result;
 }
 
+static void gen_write_new_pc_pcrel(DisasContext *ctx, Packet *pkt,
+   int pc_off, TCGv pred)
+{
+target_ulong dest = pkt->pc + pc_off;
+if (pkt->pkt_has_multi_cof) {
+TCGLabel *pred_false = NULL;
+if (pred != NULL) {
+pred_false = gen_new_label();
+tcg_gen_brcondi_tl(TCG_COND_EQ, pred, 0, pred_false);
+}
+/* If there are multiple branches in a packet, ignore the second one */
+TCGLabel *skip = gen_new_label();
+tcg_gen_brcondi_tl(TCG_COND_NE, hex_branch_taken, 0, skip);
+tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+tcg_gen_movi_tl(hex_branch_taken, 1);
+gen_set_label(skip);
+if (pred != NULL) {
+gen_set_label(pred_false);
+}
+} else {
+TCGLabel *pred_false = NULL;
+if (pred != NULL) {
+pred_false = gen_new_label();
+tcg_gen_brcondi_tl(TCG_COND_EQ, pred, 0, pred_false);
+}
+tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+if (pred != NULL) {
+gen_set_label(pred_false);
+}
+}
+}
+
+static void gen_call(DisasContext *ctx, Packet *pkt, int pc_off)
+{
+TCGv next_PC =
+tcg_constant_tl(pkt->pc + pkt->encod_pkt_size_in_bytes);
+gen_log_reg_write(HEX_REG_LR, next_PC);
+gen_write_new_pc_pcrel(ctx, pkt, pc_off, NULL);
+}
+
+static void gen_cond_call(DisasContext *ctx, Packet *pkt,
+  TCGv pred, bool sense, int pc_off)
+{
+TCGv next_PC;
+TCGv lsb = tcg_temp_local_new();
+TCGLabel *skip = gen_new_label();
+tcg_gen_andi_tl(lsb, pred, 1);
+if (!sense) {
+tcg_gen_xori_tl(lsb, lsb, 1);
+}
+gen_write_new_pc_pcrel(ctx, pkt, pc_off, lsb);
+tcg_gen_brcondi_tl(TCG_COND_EQ, lsb, 0, skip);
+tcg_temp_free(lsb);
+next_PC =
+tcg_constant_tl(pkt->pc + pkt->encod_pkt_size_in_bytes);
+gen_log_reg_write(HEX_REG_LR, next_PC);
+gen_set_label(skip);
+}
+
 static intptr_t vreg_src_off(DisasContext *ctx, int num)
 {
 intptr_t offset = offsetof(CPUHexagonState, VRegs[num]);
-- 
2.17.1



[PATCH 6/8] Hexagon (target/hexagon) Add overrides for various forms of jump

2022-10-19 Thread Taylor Simpson
Signed-off-by: Taylor Simpson 
---
 target/hexagon/gen_tcg.h | 190 +++
 target/hexagon/genptr.c  |  75 
 2 files changed, 265 insertions(+)

diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index b56b216110..dbafcae2de 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -797,6 +797,196 @@
 #define fGEN_TCG_J4_tstbit0_fp1_jump_t(SHORTCODE) \
 gen_cmpnd_tstbit0_jmp(ctx, pkt, insn, 1, false, RsV, riV)
 
+#define fGEN_TCG_J2_jump(SHORTCODE) \
+gen_jump(ctx, pkt, riV)
+#define fGEN_TCG_J2_jumpr(SHORTCODE) \
+gen_jumpr(ctx, pkt, RsV)
+#define fGEN_TCG_J4_jumpseti(SHORTCODE) \
+do { \
+tcg_gen_movi_tl(RdV, UiV); \
+gen_jump(ctx, pkt, riV); \
+} while (0)
+
+#define fGEN_TCG_cond_jump(COND) \
+do { \
+TCGv LSB = tcg_temp_new(); \
+COND; \
+gen_cond_jump(ctx, pkt, LSB, riV); \
+tcg_temp_free(LSB); \
+} while (0)
+
+#define fGEN_TCG_J2_jumpt(SHORTCODE) \
+fGEN_TCG_cond_jump(fLSBOLD(PuV))
+#define fGEN_TCG_J2_jumptpt(SHORTCODE) \
+fGEN_TCG_cond_jump(fLSBOLD(PuV))
+#define fGEN_TCG_J2_jumpf(SHORTCODE) \
+fGEN_TCG_cond_jump(fLSBOLDNOT(PuV))
+#define fGEN_TCG_J2_jumpfpt(SHORTCODE) \
+fGEN_TCG_cond_jump(fLSBOLDNOT(PuV))
+#define fGEN_TCG_J2_jumptnew(SHORTCODE) \
+gen_cond_jump(ctx, pkt, PuN, riV)
+#define fGEN_TCG_J2_jumptnewpt(SHORTCODE) \
+gen_cond_jump(ctx, pkt, PuN, riV)
+#define fGEN_TCG_J2_jumpfnewpt(SHORTCODE) \
+fGEN_TCG_cond_jump(fLSBNEWNOT(PuN))
+#define fGEN_TCG_J2_jumpfnew(SHORTCODE) \
+fGEN_TCG_cond_jump(fLSBNEWNOT(PuN))
+#define fGEN_TCG_J2_jumprz(SHORTCODE) \
+fGEN_TCG_cond_jump(tcg_gen_setcondi_tl(TCG_COND_NE, LSB, RsV, 0))
+#define fGEN_TCG_J2_jumprzpt(SHORTCODE) \
+fGEN_TCG_cond_jump(tcg_gen_setcondi_tl(TCG_COND_NE, LSB, RsV, 0))
+#define fGEN_TCG_J2_jumprnz(SHORTCODE) \
+fGEN_TCG_cond_jump(tcg_gen_setcondi_tl(TCG_COND_EQ, LSB, RsV, 0))
+#define fGEN_TCG_J2_jumprnzpt(SHORTCODE) \
+fGEN_TCG_cond_jump(tcg_gen_setcondi_tl(TCG_COND_EQ, LSB, RsV, 0))
+#define fGEN_TCG_J2_jumprgtez(SHORTCODE) \
+fGEN_TCG_cond_jump(tcg_gen_setcondi_tl(TCG_COND_GE, LSB, RsV, 0))
+#define fGEN_TCG_J2_jumprgtezpt(SHORTCODE) \
+fGEN_TCG_cond_jump(tcg_gen_setcondi_tl(TCG_COND_GE, LSB, RsV, 0))
+#define fGEN_TCG_J2_jumprltez(SHORTCODE) \
+fGEN_TCG_cond_jump(tcg_gen_setcondi_tl(TCG_COND_LE, LSB, RsV, 0))
+#define fGEN_TCG_J2_jumprltezpt(SHORTCODE) \
+fGEN_TCG_cond_jump(tcg_gen_setcondi_tl(TCG_COND_LE, LSB, RsV, 0))
+
+#define fGEN_TCG_cond_jumpr(COND) \
+do { \
+TCGv LSB = tcg_temp_new(); \
+COND; \
+gen_cond_jumpr(ctx, pkt, LSB, RsV); \
+tcg_temp_free(LSB); \
+} while (0)
+
+#define fGEN_TCG_J2_jumprt(SHORTCODE) \
+fGEN_TCG_cond_jumpr(fLSBOLD(PuV))
+#define fGEN_TCG_J2_jumprtpt(SHORTCODE) \
+fGEN_TCG_cond_jumpr(fLSBOLD(PuV))
+#define fGEN_TCG_J2_jumprf(SHORTCODE) \
+fGEN_TCG_cond_jumpr(fLSBOLDNOT(PuV))
+#define fGEN_TCG_J2_jumprfpt(SHORTCODE) \
+fGEN_TCG_cond_jumpr(fLSBOLDNOT(PuV))
+#define fGEN_TCG_J2_jumprtnew(SHORTCODE) \
+fGEN_TCG_cond_jumpr(fLSBNEW(PuN))
+#define fGEN_TCG_J2_jumprtnewpt(SHORTCODE) \
+fGEN_TCG_cond_jumpr(fLSBNEW(PuN))
+#define fGEN_TCG_J2_jumprfnew(SHORTCODE) \
+fGEN_TCG_cond_jumpr(fLSBNEWNOT(PuN))
+#define fGEN_TCG_J2_jumprfnewpt(SHORTCODE) \
+fGEN_TCG_cond_jumpr(fLSBNEWNOT(PuN))
+#define fGEN_TCG_J2_jumprfnewpt(SHORTCODE) \
+fGEN_TCG_cond_jumpr(fLSBNEWNOT(PuN))
+
+/*
+ * New value compare & jump instructions
+ * if ([!]COND(r0.new, r1) jump:t address
+ * if ([!]COND(r0.new, #7) jump:t address
+ */
+#define fGEN_TCG_J4_cmpgt_t_jumpnv_t(SHORTCODE) \
+gen_cmp_jumpnv(ctx, pkt, TCG_COND_GT, NsN, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_t_jumpnv_nt(SHORTCODE) \
+gen_cmp_jumpnv(ctx, pkt, TCG_COND_GT, NsN, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_f_jumpnv_t(SHORTCODE) \
+gen_cmp_jumpnv(ctx, pkt, TCG_COND_LE, NsN, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_f_jumpnv_nt(SHORTCODE) \
+gen_cmp_jumpnv(ctx, pkt, TCG_COND_LE, NsN, RtV, riV)
+
+#define fGEN_TCG_J4_cmpeq_t_jumpnv_t(SHORTCODE) \
+gen_cmp_jumpnv(ctx, pkt, TCG_COND_EQ, NsN, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_t_jumpnv_nt(SHORTCODE) \
+gen_cmp_jumpnv(ctx, pkt, TCG_COND_EQ, NsN, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_f_jumpnv_t(SHORTCODE) \
+gen_cmp_jumpnv(ctx, pkt, TCG_COND_NE, NsN, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_f_jumpnv_nt(SHORTCODE) \
+gen_cmp_jumpnv(ctx, pkt, TCG_COND_NE, NsN, RtV, riV)
+
+#define fGEN_TCG_J4_cmplt_f_jumpnv_t(SHORTCODE) \
+gen_cmp_jumpnv(ctx, pkt, TCG_COND_GE, NsN, RtV, riV)
+#define fGEN_TCG_J4_cmplt_f_jumpnv_nt(SHORTCODE) \
+gen_cmp_jumpnv(ctx, pkt, TCG_COND_GE, NsN, RtV, riV)
+
+#define fGEN_TCG_J4_cmpeqi_t_jumpnv_t(SHORTCODE) \
+gen_cmpi_jumpnv(ctx, pkt, TCG_COND_EQ, NsN, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_t_jumpnv_nt(SHORTCODE) \
+gen_cmpi_jumpnv(ctx, pkt, TCG_COND_EQ, NsN, UiV, riV)
+#define 

[PATCH 7/8] Hexagon (target/hexagon) Use direct block chaining for direct jump/branch

2022-10-19 Thread Taylor Simpson
Direct block chaining is documented here
https://qemu.readthedocs.io/en/latest/devel/tcg.html#direct-block-chaining

Recall that Hexagon allows packets with multiple jumps where only the first
one with a true predicate will actually jump.  So, we can only use direct
block chaining when the packet contains a single PC-relative jump.  We add
the following to DisasContext in order to perform direct block chaining at
the end of packet commit (in gen_end_tb)
has_single_direct_branch
Indicates that we can use direct block chaining
branch_cond
The condition under which the branch is taken
branch_dest
The destination of the branch

Signed-off-by: Taylor Simpson 
---
 target/hexagon/translate.h |  3 +++
 target/hexagon/genptr.c| 13 ++---
 target/hexagon/translate.c | 39 +-
 3 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index eae358cf33..e60dbf0e7a 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -54,6 +54,9 @@ typedef struct DisasContext {
 bool qreg_is_predicated[NUM_QREGS];
 int qreg_log_idx;
 bool pre_commit;
+bool has_single_direct_branch;
+TCGv branch_cond;
+target_ulong branch_dest;
 } DisasContext;
 
 static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index fba76d3b38..07b4326e56 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -505,15 +505,14 @@ static void gen_write_new_pc_pcrel(DisasContext *ctx, 
Packet *pkt,
 gen_set_label(pred_false);
 }
 } else {
-TCGLabel *pred_false = NULL;
+/* Defer this jump to the end of the TB */
+g_assert(ctx->branch_cond == NULL);
+ctx->has_single_direct_branch = true;
 if (pred != NULL) {
-pred_false = gen_new_label();
-tcg_gen_brcondi_tl(TCG_COND_EQ, pred, 0, pred_false);
-}
-tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
-if (pred != NULL) {
-gen_set_label(pred_false);
+ctx->branch_cond = tcg_temp_local_new();
+tcg_gen_mov_tl(ctx->branch_cond, pred);
 }
+ctx->branch_dest = dest;
 }
 }
 
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 71ad2da682..29e2caaf0f 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -116,10 +116,44 @@ static void gen_exec_counters(DisasContext *ctx)
 hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
 }
 
+static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
+{
+return translator_use_goto_tb(>base, dest);
+}
+
+static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest)
+{
+if (use_goto_tb(ctx, dest)) {
+tcg_gen_goto_tb(idx);
+tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+tcg_gen_exit_tb(ctx->base.tb, idx);
+} else {
+tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
+tcg_gen_lookup_and_goto_ptr();
+}
+}
+
 static void gen_end_tb(DisasContext *ctx)
 {
 gen_exec_counters(ctx);
-tcg_gen_exit_tb(NULL, 0);
+
+if (ctx->has_single_direct_branch) {
+if (ctx->branch_cond != NULL) {
+TCGLabel *skip = gen_new_label();
+tcg_gen_brcondi_tl(TCG_COND_EQ, ctx->branch_cond, 0, skip);
+gen_goto_tb(ctx, 0, ctx->branch_dest);
+gen_set_label(skip);
+gen_goto_tb(ctx, 1, ctx->next_PC);
+tcg_temp_free(ctx->branch_cond);
+ctx->branch_cond = NULL;
+} else {
+gen_goto_tb(ctx, 0, ctx->branch_dest);
+}
+} else {
+tcg_gen_lookup_and_goto_ptr();
+}
+
+g_assert(ctx->branch_cond == NULL);
 ctx->base.is_jmp = DISAS_NORETURN;
 }
 
@@ -803,6 +837,9 @@ static void hexagon_tr_init_disas_context(DisasContextBase 
*dcbase,
 
 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
 {
+DisasContext *ctx = container_of(db, DisasContext, base);
+ctx->has_single_direct_branch = false;
+ctx->branch_cond = NULL;
 }
 
 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
-- 
2.17.1



[PATCH 8/8] Hexagon (target/hexagon) Use direct block chaining for tight loops

2022-10-19 Thread Taylor Simpson
Direct block chaining is documented here
https://qemu.readthedocs.io/en/latest/devel/tcg.html#direct-block-chaining

Hexagon inner loops end with the endloop0 instruction
To go back to the beginning of the loop, this instructions writes to PC
from register SA0 (start address 0).  To use direct block chaining, we
have to assign PC with a constant value.  So, we specialize the code
generation when the start of the translation block is equal to SA0.

When this is the case, we defer the compare/branch from endloop0 to
gen_end_tb.  When this is done, we can assign the start address of the TB
to PC.

Signed-off-by: Taylor Simpson 
---
 target/hexagon/cpu.h   | 17 ++---
 target/hexagon/gen_tcg.h   |  3 ++
 target/hexagon/translate.h |  1 +
 target/hexagon/genptr.c| 71 ++
 target/hexagon/translate.c | 41 +++---
 5 files changed, 124 insertions(+), 9 deletions(-)

diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index ff8c26272d..5260e0f127 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -152,16 +152,23 @@ struct ArchCPU {
 
 #include "cpu_bits.h"
 
+typedef union {
+uint32_t i;
+struct {
+bool is_tight_loop:1;
+};
+} HexStateFlags;
+
 static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, target_ulong *pc,
 target_ulong *cs_base, uint32_t *flags)
 {
+HexStateFlags hex_flags = { 0 };
 *pc = env->gpr[HEX_REG_PC];
 *cs_base = 0;
-#ifdef CONFIG_USER_ONLY
-*flags = 0;
-#else
-#error System mode not supported on Hexagon yet
-#endif
+if (*pc == env->gpr[HEX_REG_SA0]) {
+hex_flags.is_tight_loop = true;
+}
+*flags = hex_flags.i;
 }
 
 static inline int cpu_mmu_index(CPUHexagonState *env, bool ifetch)
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index dbafcae2de..d9c345801b 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -620,6 +620,9 @@
 #define fGEN_TCG_J2_callf(SHORTCODE) \
 gen_cond_call(ctx, pkt, PuV, false, riV)
 
+#define fGEN_TCG_J2_endloop0(SHORTCODE) \
+gen_endloop0(ctx, pkt)
+
 /*
  * Compound compare and jump instructions
  * Here is a primer to understand the tag names
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index e60dbf0e7a..34abe86b5c 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -57,6 +57,7 @@ typedef struct DisasContext {
 bool has_single_direct_branch;
 TCGv branch_cond;
 target_ulong branch_dest;
+bool is_tight_loop;
 } DisasContext;
 
 static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 07b4326e56..252ed52d1b 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -516,6 +516,20 @@ static void gen_write_new_pc_pcrel(DisasContext *ctx, 
Packet *pkt,
 }
 }
 
+static void gen_set_usr_field(int field, TCGv val)
+{
+tcg_gen_deposit_tl(hex_new_value[HEX_REG_USR], hex_new_value[HEX_REG_USR],
+   val,
+   reg_field_info[field].offset,
+   reg_field_info[field].width);
+}
+
+static void gen_set_usr_fieldi(int field, int x)
+{
+TCGv val = tcg_constant_tl(x);
+gen_set_usr_field(field, val);
+}
+
 static void gen_compare(TCGCond cond, TCGv res, TCGv arg1, TCGv arg2)
 {
 TCGv one = tcg_constant_tl(0xff);
@@ -645,6 +659,63 @@ static void gen_cond_call(DisasContext *ctx, Packet *pkt,
 gen_set_label(skip);
 }
 
+static void gen_endloop0(DisasContext *ctx, Packet *pkt)
+{
+TCGv lpcfg = tcg_temp_local_new();
+
+GET_USR_FIELD(USR_LPCFG, lpcfg);
+
+/*
+ *if (lpcfg == 1) {
+ *hex_new_pred_value[3] = 0xff;
+ *hex_pred_written |= 1 << 3;
+ *}
+ */
+TCGLabel *label1 = gen_new_label();
+tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1);
+{
+tcg_gen_movi_tl(hex_new_pred_value[3], 0xff);
+tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3);
+}
+gen_set_label(label1);
+
+/*
+ *if (lpcfg) {
+ *SET_USR_FIELD(USR_LPCFG, lpcfg - 1);
+ *}
+ */
+TCGLabel *label2 = gen_new_label();
+tcg_gen_brcondi_tl(TCG_COND_EQ, lpcfg, 0, label2);
+{
+tcg_gen_subi_tl(lpcfg, lpcfg, 1);
+SET_USR_FIELD(USR_LPCFG, lpcfg);
+}
+gen_set_label(label2);
+
+/*
+ * If we're in a tight loop, we'll do this at the end of the TB to take
+ * advantage of direct block chaining.
+ */
+if (!ctx->is_tight_loop) {
+/*
+ *if (hex_gpr[HEX_REG_LC0] > 1) {
+ *PC = hex_gpr[HEX_REG_SA0];
+ *hex_new_value[HEX_REG_LC0] = hex_gpr[HEX_REG_LC0] - 1;
+ *}
+ */
+TCGLabel *label3 = gen_new_label();
+tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, label3);
+{
+gen_jumpr(ctx, pkt, 

[PATCH 1/8] Hexagon (target/hexagon) Only use branch_taken when packet has multi cof

2022-10-19 Thread Taylor Simpson
When a packet has more than one change-of-flow instruction, only the first
one to branch is considered.  We use the branch_taken variable to keep
track of this.

However, when there is a single cof instruction, we don't need the same
amount of bookkeeping.

We add the pkt_has_multi_cof member to the Packet structure, and pass this
information to the needed functions.

When there is a generated helper function with cof, the generator will
pass this pkt_has_multi_cof as a runtime value.

Signed-off-by: Taylor Simpson 
---
 target/hexagon/insn.h   |  1 +
 target/hexagon/macros.h |  2 +-
 target/hexagon/decode.c | 15 +--
 target/hexagon/op_helper.c  | 24 +++-
 target/hexagon/translate.c  |  4 +++-
 target/hexagon/gen_helper_funcs.py  |  3 +++
 target/hexagon/gen_helper_protos.py |  6 +-
 target/hexagon/gen_tcg_funcs.py |  5 +
 target/hexagon/hex_common.py|  8 
 9 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h
index aa26389147..857a7ceb75 100644
--- a/target/hexagon/insn.h
+++ b/target/hexagon/insn.h
@@ -60,6 +60,7 @@ struct Packet {
 
 /* Pre-decodes about COF */
 bool pkt_has_cof;  /* Has any change-of-flow */
+bool pkt_has_multi_cof;/* Has more than one change-of-flow */
 bool pkt_has_endloop;
 
 bool pkt_has_dczeroa;
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index c8805bdaeb..e908405d82 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -407,7 +407,7 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int 
shift)
 
 #define fCHECK_PCALIGN(A)
 
-#define fWRITE_NPC(A) write_new_pc(env, A)
+#define fWRITE_NPC(A) write_new_pc(env, pkt_has_multi_cof != 0, A)
 
 #define fBRANCH(LOC, TYPE)  fWRITE_NPC(LOC)
 #define fJUMPR(REGNO, TARGET, TYPE) fBRANCH(TARGET, COF_TYPE_JUMPR)
diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c
index 6b73b5c60c..041c8de751 100644
--- a/target/hexagon/decode.c
+++ b/target/hexagon/decode.c
@@ -388,6 +388,7 @@ static void decode_set_insn_attr_fields(Packet *pkt)
 uint16_t opcode;
 
 pkt->pkt_has_cof = false;
+pkt->pkt_has_multi_cof = false;
 pkt->pkt_has_endloop = false;
 pkt->pkt_has_dczeroa = false;
 
@@ -412,13 +413,23 @@ static void decode_set_insn_attr_fields(Packet *pkt)
 }
 }
 
-pkt->pkt_has_cof |= decode_opcode_can_jump(opcode);
+if (decode_opcode_can_jump(opcode)) {
+if (pkt->pkt_has_cof) {
+pkt->pkt_has_multi_cof = true;
+}
+pkt->pkt_has_cof = true;
+}
 
 pkt->insn[i].is_endloop = decode_opcode_ends_loop(opcode);
 
 pkt->pkt_has_endloop |= pkt->insn[i].is_endloop;
 
-pkt->pkt_has_cof |= pkt->pkt_has_endloop;
+if (pkt->pkt_has_endloop) {
+if (pkt->pkt_has_cof) {
+pkt->pkt_has_multi_cof = true;
+}
+pkt->pkt_has_cof = true;
+}
 }
 }
 
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 085afc3274..84391e25eb 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -104,20 +104,26 @@ static void log_store64(CPUHexagonState *env, 
target_ulong addr,
 env->mem_log_stores[slot].data64 = val;
 }
 
-static void write_new_pc(CPUHexagonState *env, target_ulong addr)
+static void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof,
+ target_ulong addr)
 {
 HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
 
-/*
- * If more than one branch is taken in a packet, only the first one
- * is actually done.
- */
-if (env->branch_taken) {
-HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, "
-  "ignoring the second one\n");
+if (pkt_has_multi_cof) {
+/*
+ * If more than one branch is taken in a packet, only the first one
+ * is actually done.
+ */
+if (env->branch_taken) {
+HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, "
+  "ignoring the second one\n");
+} else {
+fCHECK_PCALIGN(addr);
+env->next_PC = addr;
+env->branch_taken = 1;
+}
 } else {
 fCHECK_PCALIGN(addr);
-env->branch_taken = 1;
 env->next_PC = addr;
 }
 }
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 2329177537..2e46cc0680 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -247,7 +247,9 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt)
 tcg_gen_movi_tl(hex_slot_cancelled, 0);
 }
 if (pkt->pkt_has_cof) {
-tcg_gen_movi_tl(hex_branch_taken, 0);
+if (pkt->pkt_has_multi_cof) {
+tcg_gen_movi_tl(hex_branch_taken, 0);
+}
   

[PATCH 3/8] Hexagon (target/hexagon) Remove next_PC from runtime state

2022-10-19 Thread Taylor Simpson
The imported files don't properly mark all CONDEXEC instructions, so
we add some logic to hex_common.py to add the attribute.

Signed-off-by: Taylor Simpson 
---
 target/hexagon/cpu.h|  1 -
 target/hexagon/gen_tcg.h|  6 ++
 target/hexagon/macros.h |  2 +-
 target/hexagon/translate.h  |  2 +-
 target/hexagon/op_helper.c  |  6 +++---
 target/hexagon/translate.c  | 27 +--
 target/hexagon/gen_helper_funcs.py  |  4 
 target/hexagon/gen_helper_protos.py |  3 +++
 target/hexagon/gen_tcg_funcs.py |  3 +++
 target/hexagon/hex_common.py| 20 
 10 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 2a65a57bab..ff8c26272d 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -78,7 +78,6 @@ typedef struct CPUArchState {
 target_ulong gpr[TOTAL_PER_THREAD_REGS];
 target_ulong pred[NUM_PREGS];
 target_ulong branch_taken;
-target_ulong next_PC;
 
 /* For comparing with LLDB on target - see adjust_stack_ptrs function */
 target_ulong last_pc_dumped;
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 7f0ba27eb6..e6fc7d97d2 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -612,6 +612,12 @@
 tcg_temp_free(tmp); \
 } while (0)
 
+#define fGEN_TCG_J2_pause(SHORTCODE) \
+do { \
+uiV = uiV; \
+tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC); \
+} while (0)
+
 /* Floating point */
 #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \
 gen_helper_conv_sf2df(RddV, cpu_env, RsV)
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 469dfa5571..2fc549c37e 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -400,7 +400,7 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int 
shift)
 #endif
 #define fREAD_PC() (PC)
 
-#define fREAD_NPC() (env->next_PC & (0xfffe))
+#define fREAD_NPC() (next_PC & (0xfffe))
 
 #define fREAD_P0() (READ_PREG(0))
 #define fREAD_P3() (READ_PREG(3))
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index a245172827..eae358cf33 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -27,6 +27,7 @@
 
 typedef struct DisasContext {
 DisasContextBase base;
+uint32_t next_PC;
 uint32_t mem_idx;
 uint32_t num_packets;
 uint32_t num_insns;
@@ -125,7 +126,6 @@ static inline void ctx_log_qreg_write(DisasContext *ctx,
 
 extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
 extern TCGv hex_pred[NUM_PREGS];
-extern TCGv hex_next_PC;
 extern TCGv hex_this_PC;
 extern TCGv hex_slot_cancelled;
 extern TCGv hex_branch_taken;
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 84391e25eb..aad0195eb6 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -119,12 +119,12 @@ static void write_new_pc(CPUHexagonState *env, bool 
pkt_has_multi_cof,
   "ignoring the second one\n");
 } else {
 fCHECK_PCALIGN(addr);
-env->next_PC = addr;
+env->gpr[HEX_REG_PC] = addr;
 env->branch_taken = 1;
 }
 } else {
 fCHECK_PCALIGN(addr);
-env->next_PC = addr;
+env->gpr[HEX_REG_PC] = addr;
 }
 }
 
@@ -299,7 +299,7 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int 
has_st0, int has_st1)
 }
 }
 
-HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->next_PC);
+HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->gpr[HEX_REG_PC]);
 HEX_DEBUG_LOG("Exec counters: pkt = " TARGET_FMT_lx
   ", insn = " TARGET_FMT_lx
   ", hvx = " TARGET_FMT_lx "\n",
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index fd4f0efa26..71ad2da682 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -31,7 +31,6 @@
 
 TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
 TCGv hex_pred[NUM_PREGS];
-TCGv hex_next_PC;
 TCGv hex_this_PC;
 TCGv hex_slot_cancelled;
 TCGv hex_branch_taken;
@@ -120,7 +119,6 @@ static void gen_exec_counters(DisasContext *ctx)
 static void gen_end_tb(DisasContext *ctx)
 {
 gen_exec_counters(ctx);
-tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC);
 tcg_gen_exit_tb(NULL, 0);
 ctx->base.is_jmp = DISAS_NORETURN;
 }
@@ -128,7 +126,7 @@ static void gen_end_tb(DisasContext *ctx)
 static void gen_exception_end_tb(DisasContext *ctx, int excp)
 {
 gen_exec_counters(ctx);
-tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC);
+tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC);
 gen_exception_raw(excp);
 ctx->base.is_jmp = DISAS_NORETURN;
 
@@ -204,12 +202,29 @@ static bool need_pred_written(Packet *pkt)
 return check_for_attrib(pkt, A_WRITES_PRED_REG);
 }
 
+static bool need_next_PC(Packet *pkt)
+{
+/* Check for conditional control flow or HW loop end */
+for (int i = 0; i < 

[PATCH 2/8] Hexagon (target/hexagon) Remove PC from the runtime state

2022-10-19 Thread Taylor Simpson
Add pc field to Packet structure
For helpers that need PC, pass an extra argument
Remove slot arg from conditional jump helpers
On a trap0, copy pkt->pc into hex_gpr[HEX_REG_PC]

Signed-off-by: Taylor Simpson 
---
 target/hexagon/gen_tcg.h| 7 +++
 target/hexagon/insn.h   | 1 +
 target/hexagon/macros.h | 2 +-
 target/hexagon/translate.c  | 9 +
 target/hexagon/gen_helper_funcs.py  | 4 
 target/hexagon/gen_helper_protos.py | 3 +++
 target/hexagon/gen_tcg_funcs.py | 3 +++
 target/hexagon/hex_common.py| 6 +-
 8 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 50634ac459..7f0ba27eb6 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -742,4 +742,11 @@
 RsV = RsV; \
 } while (0)
 
+#define fGEN_TCG_J2_trap0(SHORTCODE) \
+do { \
+uiV = uiV; \
+tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], pkt->pc); \
+TCGv excp = tcg_constant_tl(HEX_EXCP_TRAP0); \
+gen_helper_raise_exception(cpu_env, excp); \
+} while (0)
 #endif
diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h
index 857a7ceb75..b3260d1f0b 100644
--- a/target/hexagon/insn.h
+++ b/target/hexagon/insn.h
@@ -57,6 +57,7 @@ typedef struct Instruction Insn;
 struct Packet {
 uint16_t num_insns;
 uint16_t encod_pkt_size_in_bytes;
+uint32_t pc;
 
 /* Pre-decodes about COF */
 bool pkt_has_cof;  /* Has any change-of-flow */
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index e908405d82..469dfa5571 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -398,7 +398,7 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int 
shift)
 #else
 #define fREAD_GP() READ_REG(HEX_REG_GP)
 #endif
-#define fREAD_PC() (READ_REG(HEX_REG_PC))
+#define fREAD_PC() (PC)
 
 #define fREAD_NPC() (env->next_PC & (0xfffe))
 
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 2e46cc0680..fd4f0efa26 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -194,11 +194,6 @@ static bool check_for_attrib(Packet *pkt, int attrib)
 return false;
 }
 
-static bool need_pc(Packet *pkt)
-{
-return check_for_attrib(pkt, A_IMPLICIT_READS_PC);
-}
-
 static bool need_slot_cancelled(Packet *pkt)
 {
 return check_for_attrib(pkt, A_CONDEXEC);
@@ -240,9 +235,6 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt)
 }
 
 /* Initialize the runtime state for packet semantics */
-if (need_pc(pkt)) {
-tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
-}
 if (need_slot_cancelled(pkt)) {
 tcg_gen_movi_tl(hex_slot_cancelled, 0);
 }
@@ -768,6 +760,7 @@ static void decode_and_translate_packet(CPUHexagonState 
*env, DisasContext *ctx)
 }
 
 if (decode_packet(nwords, words, , false) > 0) {
+pkt.pc = ctx->base.pc_next;
 HEX_DEBUG_PRINT_PKT();
 gen_start_packet(ctx, );
 for (i = 0; i < pkt.num_insns; i++) {
diff --git a/target/hexagon/gen_helper_funcs.py 
b/target/hexagon/gen_helper_funcs.py
index f7c1a82e9f..8ab144b20a 100755
--- a/target/hexagon/gen_helper_funcs.py
+++ b/target/hexagon/gen_helper_funcs.py
@@ -241,6 +241,10 @@ def gen_helper_function(f, tag, tagregs, tagimms):
 if (hex_common.need_pkt_has_multi_cof(tag)):
 f.write(", uint32_t pkt_has_multi_cof")
 
+if hex_common.need_PC(tag):
+if i > 0: f.write(", ")
+f.write("target_ulong PC")
+i += 1
 if hex_common.need_slot(tag):
 if i > 0: f.write(", ")
 f.write("uint32_t slot")
diff --git a/target/hexagon/gen_helper_protos.py 
b/target/hexagon/gen_helper_protos.py
index 4530d7ba8d..2385717dda 100755
--- a/target/hexagon/gen_helper_protos.py
+++ b/target/hexagon/gen_helper_protos.py
@@ -85,6 +85,7 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
 if hex_common.need_pkt_has_multi_cof(tag): def_helper_size += 1
 if hex_common.need_part1(tag): def_helper_size += 1
 if hex_common.need_slot(tag): def_helper_size += 1
+if hex_common.need_PC(tag): def_helper_size += 1
 f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag))
 ## The return type is void
 f.write(', void' )
@@ -93,6 +94,7 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
 if hex_common.need_pkt_has_multi_cof(tag): def_helper_size += 1
 if hex_common.need_part1(tag): def_helper_size += 1
 if hex_common.need_slot(tag): def_helper_size += 1
+if hex_common.need_PC(tag): def_helper_size += 1
 f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag))
 
 ## Generate the qemu DEF_HELPER type for each result
@@ -131,6 +133,7 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
 ## Add the arguments for the instruction 

Re: [PATCH v3 00/29] PowerPC interrupt rework

2022-10-19 Thread Daniel Henrique Barboza

Matheus,

This series fails 'make check-avocado' in an e500 test. This is the error 
output:


& make -j && \
make check-avocado 
AVOCADO_TESTS=tests/avocado/replay_kernel.py:ReplayKernelNormal.test_ppc64_e500

(...)

Fetching asset from 
tests/avocado/replay_kernel.py:ReplayKernelNormal.test_ppc64_e500
JOB ID : 506b6b07bf40cf1bffcf3911a0f9b8948de6553c
JOB LOG: 
/home/danielhb/qemu/build/tests/results/job-2022-10-19T17.37-506b6b0/job.log
 (1/1) tests/avocado/replay_kernel.py:ReplayKernelNormal.test_ppc64_e500: 
INTERRUPTED: Test interrupted by SIGTERM\nRunner error occurred: Timeout 
reached\nOriginal status: ERROR\n{'name': 
'1-tests/avocado/replay_kernel.py:ReplayKernelNormal.test_ppc64_e500', 
'logdir': 
'/home/danielhb/qemu/build/tests/results/job-2022-10-19T17.37-506b6b0/test-... 
(120.31 s)
RESULTS: PASS 0 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 1 | CANCEL 0
JOB TIME   : 121.00 s



'git bisect' pointed the following commit as the culprit:

d9bdb6192edc5c74cda754a6cd32237b1b9272f0 is the first bad commit
commit d9bdb6192edc5c74cda754a6cd32237b1b9272f0
Author: Matheus Ferst 
Date:   Tue Oct 11 17:48:27 2022 -0300

target/ppc: introduce ppc_maybe_interrupt



This would be patch 27.


As a benchmark, this test when successful takes around 11 seconds in my test
env:

 (33/42) tests/avocado/replay_kernel.py:ReplayKernelNormal.test_ppc64_e500: 
PASS (11.02 s)


Cedric's qemu-ppc-boot test suit works fine with this series, so I'd say that
this avocado test is doing something else that is causing the problem.


I'll test patches 1-26 later and see if all tests pass. In that case I'll push
1-26 to ppc-next and then you can work on 27-29.


Thanks,


Daniel



On 10/11/22 17:48, Matheus Ferst wrote:

Link to v2: https://lists.gnu.org/archive/html/qemu-ppc/2022-09/msg00556.html
This series is also available as a git branch: 
https://github.com/PPC64/qemu/tree/ferst-interrupt-fix-v3
Patches without review: 3-27

This new version rebases the patch series on the current master and
fixes some problems pointed out by Fabiano on v2.

Matheus Ferst (29):
   target/ppc: define PPC_INTERRUPT_* values directly
   target/ppc: always use ppc_set_irq to set env->pending_interrupts
   target/ppc: split interrupt masking and delivery from ppc_hw_interrupt
   target/ppc: prepare to split interrupt masking and delivery by excp_model
   target/ppc: create an interrupt masking method for POWER9/POWER10
   target/ppc: remove unused interrupts from p9_next_unmasked_interrupt
   target/ppc: create an interrupt deliver method for POWER9/POWER10
   target/ppc: remove unused interrupts from p9_deliver_interrupt
   target/ppc: remove generic architecture checks from p9_deliver_interrupt
   target/ppc: move power-saving interrupt masking out of cpu_has_work_POWER9
   target/ppc: add power-saving interrupt masking logic to 
p9_next_unmasked_interrupt
   target/ppc: create an interrupt masking method for POWER8
   target/ppc: remove unused interrupts from p8_next_unmasked_interrupt
   target/ppc: create an interrupt deliver method for POWER8
   target/ppc: remove unused interrupts from p8_deliver_interrupt
   target/ppc: remove generic architecture checks from p8_deliver_interrupt
   target/ppc: move power-saving interrupt masking out of cpu_has_work_POWER8
   target/ppc: add power-saving interrupt masking logic to 
p8_next_unmasked_interrupt
   target/ppc: create an interrupt masking method for POWER7
   target/ppc: remove unused interrupts from p7_next_unmasked_interrupt
   target/ppc: create an interrupt deliver method for POWER7
   target/ppc: remove unused interrupts from p7_deliver_interrupt
   target/ppc: remove generic architecture checks from p7_deliver_interrupt
   target/ppc: move power-saving interrupt masking out of cpu_has_work_POWER7
   target/ppc: add power-saving interrupt masking logic to 
p7_next_unmasked_interrupt
   target/ppc: remove ppc_store_lpcr from CONFIG_USER_ONLY builds
   target/ppc: introduce ppc_maybe_interrupt
   target/ppc: unify cpu->has_work based on cs->interrupt_request
   target/ppc: move the p*_interrupt_powersave methods to excp_helper.c

  hw/ppc/pnv_core.c|   1 +
  hw/ppc/ppc.c |  17 +-
  hw/ppc/spapr_hcall.c |   6 +
  hw/ppc/spapr_rtas.c  |   2 +-
  hw/ppc/trace-events  |   2 +-
  target/ppc/cpu.c |   4 +
  target/ppc/cpu.h |  43 +-
  target/ppc/cpu_init.c| 212 +-
  target/ppc/excp_helper.c | 887 ++-
  target/ppc/helper.h  |   1 +
  target/ppc/helper_regs.c |   2 +
  target/ppc/misc_helper.c |  11 +-
  target/ppc/translate.c   |   2 +
  13 files changed, 833 insertions(+), 357 deletions(-)





Re: [PATCH] target/riscv/pmp: fix non-translated page size address checks w/ MPU

2022-10-19 Thread Leon Schuermann
Alistair Francis  writes:
>> @@ -310,10 +311,17 @@ bool pmp_hart_has_privs(CPURISCVState *env, 
>> target_ulong addr,
>>  }
>>
>>  if (size == 0) {
>> -if (riscv_feature(env, RISCV_FEATURE_MMU)) {
>> +if (riscv_cpu_mxl(env) == MXL_RV32) {
>> +satp_mode = SATP32_MODE;
>> +} else {
>> +satp_mode = SATP64_MODE;
>> +}
>> +
>> +if (riscv_feature(env, RISCV_FEATURE_MMU)
>> +&& get_field(env->satp, satp_mode)) {
>>  /*
>> - * If size is unknown (0), assume that all bytes
>> - * from addr to the end of the page will be accessed.
>> + * If size is unknown (0) and virtual memory is enabled, assume 
>> that
>> + * all bytes from addr to the end of the page will be accessed.
>>   */
>>  pmp_size = -(addr | TARGET_PAGE_MASK);
>
> I'm not sure if we need this at all.
>
> This function is only called from get_physical_address_pmp() which
> then calculates the maximum size using pmp_is_range_in_tlb().

I'm by no means an expert on QEMU and the TCG, so I've spun up GDB to
trace down why exactly this function is called with a `size = 0`
argument. It seems that there are, generally, two code paths to this
function for instruction fetching:

1. From `get_page_addr_code`: this will invoke `tlb_fill` with
   `size = 0` to check whether an entire page is accessible and can be
   translated given the current MMU / PMP configuration. In my
   particular example, it may rightfully fail then. `get_page_addr_code`
   can handle this and will subsequently cause an MMU protection check
   to be run for each instruction translated.

2. From `riscv_tr_translate_insn` through `cpu_lduw_code`, which will
   execute `tlb_fill` with `size = 2` (to try and decode a compressed
   instruction), assuming that the above check failed.

So far, so good. In this context, it actually makes sense for
`pmp_hart_has_privs` to interpret `size = 0` to mean whether the entire
page is allowed to be accessed.

> I suspect that we could just use sizeof(target_ulong) as the fallback
> for every time size == 0. Then pmp_is_range_in_tlb() will set the
> tlb_size to the maximum possible size of the PMP region.

Given the above, I don't think that this is correct either. The PMP
check would pass even for non-page sized regions, but the entire page
would be accessible through the TCG's TLB, as a consequence of
`get_page_addr_code`.


In the current implementation, `get_page_addr_code_hostp` calls
`tlb_fill`, which in turn invokes the RISC-V TCG op `tlb_fill` with the
parameter `probe = false`. This in turn raises a PMP exception in the
CPU, whereas `get_page_addr_code` would seem to expect this a failing
`tlb_fill` to be side-effect free, such that the MMU protection checks
can be re-run per instruction in the TCG code generation phase.

I think that this is sufficient evidence to conclude that my initial
patch is actually incorrect, however I am unsure as to how this issue
can be solved proper. One approach which seems to work is to change
`get_page_addr_code_hostp` to use a non-faulting page-table read
instead:

@@ -1510,11 +1510,15 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState 
*env, target_ulong addr,
 uintptr_t mmu_idx = cpu_mmu_index(env, true);
 uintptr_t index = tlb_index(env, mmu_idx, addr);
 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+CPUState *cs = env_cpu(env);
+CPUClass *cc = CPU_GET_CLASS(cs);
 void *p;
 
 if (unlikely(!tlb_hit(entry->addr_code, addr))) {
 if (!VICTIM_TLB_HIT(addr_code, addr)) {
-tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
+// Nonfaulting page-table read:
+cc->tcg_ops->tlb_fill(cs, addr, 0, MMU_INST_FETCH, mmu_idx, true,
+  0);
 index = tlb_index(env, mmu_idx, addr);
 entry = tlb_entry(env, mmu_idx, addr);
 

However, given this touches the generic TCG implementation, I cannot
judge whether this is correct or has any unintended side effects for
other targets. If this is correct, I'd be happy to send a proper patch.

-Leon



Re: [PATCH 4/6] target/ppc: move msgclr/msgsnd to decodetree

2022-10-19 Thread Daniel Henrique Barboza




On 10/6/22 17:06, Matheus Ferst wrote:

Signed-off-by: Matheus Ferst 
---


Reviewed-by: Daniel Henrique Barboza 


  target/ppc/insn32.decode  |  5 ++
  target/ppc/translate.c| 34 +
  .../ppc/translate/processor-ctrl-impl.c.inc   | 70 +++
  3 files changed, 77 insertions(+), 32 deletions(-)
  create mode 100644 target/ppc/translate/processor-ctrl-impl.c.inc

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index a5249ee32c..bba49ded1b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -908,3 +908,8 @@ SLBSYNC 01 - - - 0101010010 -
  
  TLBIE   01 . - .. . . . 0100110010 -@X_tlbie

  TLBIEL  01 . - .. . . . 0100010010 -@X_tlbie
+
+# Processor Control Instructions
+
+MSGCLR  01 - - . 0011101110 -   @X_rb
+MSGSND  01 - - . 0011001110 -   @X_rb
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 435066c4a3..889cca6325 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6241,34 +6241,6 @@ static void gen_icbt_440(DisasContext *ctx)
  
  /* Embedded.Processor Control */
  
-static void gen_msgclr(DisasContext *ctx)

-{
-#if defined(CONFIG_USER_ONLY)
-GEN_PRIV(ctx);
-#else
-CHK_HV(ctx);
-if (is_book3s_arch2x(ctx)) {
-gen_helper_book3s_msgclr(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-} else {
-gen_helper_msgclr(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-}
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-static void gen_msgsnd(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-GEN_PRIV(ctx);
-#else
-CHK_HV(ctx);
-if (is_book3s_arch2x(ctx)) {
-gen_helper_book3s_msgsnd(cpu_gpr[rB(ctx->opcode)]);
-} else {
-gen_helper_msgsnd(cpu_gpr[rB(ctx->opcode)]);
-}
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
  #if defined(TARGET_PPC64)
  static void gen_msgclrp(DisasContext *ctx)
  {
@@ -6628,6 +6600,8 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, 
arg_PLS_D *a)
  
  #include "translate/branch-impl.c.inc"
  
+#include "translate/processor-ctrl-impl.c.inc"

+
  #include "translate/storage-ctrl-impl.c.inc"
  
  /* Handles lfdp */

@@ -6901,10 +6875,6 @@ GEN_HANDLER2_E(tlbivax_booke206, "tlbivax", 0x1F, 0x12, 
0x18, 0x0001,
 PPC_NONE, PPC2_BOOKE206),
  GEN_HANDLER2_E(tlbilx_booke206, "tlbilx", 0x1F, 0x12, 0x00, 0x0381,
 PPC_NONE, PPC2_BOOKE206),
-GEN_HANDLER2_E(msgsnd, "msgsnd", 0x1F, 0x0E, 0x06, 0x03ff0001,
-   PPC_NONE, (PPC2_PRCNTL | PPC2_ISA207S)),
-GEN_HANDLER2_E(msgclr, "msgclr", 0x1F, 0x0E, 0x07, 0x03ff0001,
-   PPC_NONE, (PPC2_PRCNTL | PPC2_ISA207S)),
  GEN_HANDLER2_E(msgsync, "msgsync", 0x1F, 0x16, 0x1B, 0x,
 PPC_NONE, PPC2_ISA300),
  GEN_HANDLER(wrtee, 0x1F, 0x03, 0x04, 0x000FFC01, PPC_WRTEE),
diff --git a/target/ppc/translate/processor-ctrl-impl.c.inc 
b/target/ppc/translate/processor-ctrl-impl.c.inc
new file mode 100644
index 00..0192b45c8f
--- /dev/null
+++ b/target/ppc/translate/processor-ctrl-impl.c.inc
@@ -0,0 +1,70 @@
+/*
+ * Power ISA decode for Storage Control instructions
+ *
+ * Copyright (c) 2022 Instituto de Pesquisas Eldorado (eldorado.org.br)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+/*
+ * Processor Control Instructions
+ */
+
+static bool trans_MSGCLR(DisasContext *ctx, arg_X_rb *a)
+{
+if (!(ctx->insns_flags2 & PPC2_ISA207S)) {
+/*
+ * Before Power ISA 2.07, processor control instructions were only
+ * implemented in the "Embedded.Processor Control" category.
+ */
+REQUIRE_INSNS_FLAGS2(ctx, PRCNTL);
+}
+
+REQUIRE_HV(ctx);
+
+#if !defined(CONFIG_USER_ONLY)
+if (is_book3s_arch2x(ctx)) {
+gen_helper_book3s_msgclr(cpu_env, cpu_gpr[a->rb]);
+} else {
+gen_helper_msgclr(cpu_env, cpu_gpr[a->rb]);
+}
+#else
+qemu_build_not_reached();
+#endif
+return true;
+}
+
+static bool trans_MSGSND(DisasContext *ctx, arg_X_rb *a)
+{
+if (!(ctx->insns_flags2 & PPC2_ISA207S)) {
+/*
+ * Before Power ISA 2.07, processor control instructions were only
+ * implemented in the "Embedded.Processor Control" category.
+ */
+

Re: [PATCH 6/6] target/ppc: move msgsync to decodetree

2022-10-19 Thread Daniel Henrique Barboza




On 10/6/22 17:06, Matheus Ferst wrote:

Signed-off-by: Matheus Ferst 
---


Reviewed-by: Daniel Henrique Barboza 


  target/ppc/insn32.decode   |  1 +
  target/ppc/translate.c | 14 --
  target/ppc/translate/processor-ctrl-impl.c.inc |  9 +
  3 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 5ba4a6807d..70a3b4de5e 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -915,3 +915,4 @@ MSGCLR  01 - - . 0011101110 -   
@X_rb
  MSGSND  01 - - . 0011001110 -   @X_rb
  MSGCLRP 01 - - . 0010101110 -   @X_rb
  MSGSNDP 01 - - . 0010001110 -   @X_rb
+MSGSYNC 01 - - - 1101110110 -
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 087ab8e69d..f092bbeb8b 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6239,18 +6239,6 @@ static void gen_icbt_440(DisasContext *ctx)
   */
  }
  
-/* Embedded.Processor Control */

-
-static void gen_msgsync(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-GEN_PRIV(ctx);
-#else
-CHK_HV(ctx);
-#endif /* defined(CONFIG_USER_ONLY) */
-/* interpreted as no-op */
-}
-
  #if defined(TARGET_PPC64)
  static void gen_maddld(DisasContext *ctx)
  {
@@ -6853,8 +6841,6 @@ GEN_HANDLER2_E(tlbivax_booke206, "tlbivax", 0x1F, 0x12, 
0x18, 0x0001,
 PPC_NONE, PPC2_BOOKE206),
  GEN_HANDLER2_E(tlbilx_booke206, "tlbilx", 0x1F, 0x12, 0x00, 0x0381,
 PPC_NONE, PPC2_BOOKE206),
-GEN_HANDLER2_E(msgsync, "msgsync", 0x1F, 0x16, 0x1B, 0x,
-   PPC_NONE, PPC2_ISA300),
  GEN_HANDLER(wrtee, 0x1F, 0x03, 0x04, 0x000FFC01, PPC_WRTEE),
  GEN_HANDLER(wrteei, 0x1F, 0x03, 0x05, 0x000E7C01, PPC_WRTEE),
  GEN_HANDLER(dlmzb, 0x1F, 0x0E, 0x02, 0x, PPC_440_SPEC),
diff --git a/target/ppc/translate/processor-ctrl-impl.c.inc 
b/target/ppc/translate/processor-ctrl-impl.c.inc
index 3703001f31..021e365a57 100644
--- a/target/ppc/translate/processor-ctrl-impl.c.inc
+++ b/target/ppc/translate/processor-ctrl-impl.c.inc
@@ -92,3 +92,12 @@ static bool trans_MSGSNDP(DisasContext *ctx, arg_X_rb *a)
  #endif
  return true;
  }
+
+static bool trans_MSGSYNC(DisasContext *ctx, arg_MSGSYNC *a)
+{
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_HV(ctx);
+
+/* interpreted as no-op */
+return true;
+}




Re: [PATCH 5/6] target/ppc: move msgclrp/msgsndp to decodetree

2022-10-19 Thread Daniel Henrique Barboza

Matheus,

This patch fails ppc-softmmu emulation:


FAILED: libqemu-ppc-softmmu.fa.p/target_ppc_translate.c.o
cc -m64 -mcx16 -Ilibqemu-ppc-softmmu.fa.p -I. -I.. -Itarget/ppc -I../target/ppc -I../dtc/libfdt 
-Iqapi -Itrace -Iui -Iui/shader -I/usr/include/pixman-1 -I/usr/include/glib-2.0 
-I/usr/lib64/glib-2.0/include -I/usr/include/sysprof-4 -fdiagnostics-color=auto -Wall -Winvalid-pch 
-Werror -std=gnu11 -O2 -g -isystem /home/danielhb/qemu/linux-headers -isystem linux-headers -iquote 
. -iquote /home/danielhb/qemu -iquote /home/danielhb/qemu/include -iquote 
/home/danielhb/qemu/tcg/i386 -pthread -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE 
-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wundef 
-Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common -fwrapv 
-Wold-style-declaration -Wold-style-definition -Wtype-limits -Wformat-security -Wformat-y2k 
-Winit-self -Wignored-qualifiers -Wempty-body -Wnested-externs -Wendif-labels 
-Wexpansion-to-defined -Wimplicit-fallthrough=2 -Wno-missing-include-dirs -Wno-shift-negative-value 
-Wno-psabi -fstack-protector-strong -fPIE -isystem../linux-headers -isystemlinux-headers 
-DNEED_CPU_H '-DCONFIG_TARGET="ppc-softmmu-config-target.h"' 
'-DCONFIG_DEVICES="ppc-softmmu-config-devices.h"' -MD -MQ 
libqemu-ppc-softmmu.fa.p/target_ppc_translate.c.o -MF 
libqemu-ppc-softmmu.fa.p/target_ppc_translate.c.o.d -o 
libqemu-ppc-softmmu.fa.p/target_ppc_translate.c.o -c ../target/ppc/translate.c
In file included from ../target/ppc/translate.c:21:
In function ‘trans_MSGCLRP’,
inlined from ‘decode_insn32’ at 
libqemu-ppc-softmmu.fa.p/decode-insn32.c.inc:3250:21,
inlined from ‘ppc_tr_translate_insn’ at ../target/ppc/translate.c:7552:15:
/home/danielhb/qemu/include/qemu/osdep.h:184:35: error: call to 
‘qemu_build_not_reached_always’ declared with attribute error: code path is 
reachable
  184 | #define qemu_build_not_reached()  qemu_build_not_reached_always()
  |   ^~~
../target/ppc/translate/processor-ctrl-impl.c.inc:79:5: note: in expansion of 
macro ‘qemu_build_not_reached’
   79 | qemu_build_not_reached();
  | ^~

The error is down there:




On 10/6/22 17:06, Matheus Ferst wrote:

Signed-off-by: Matheus Ferst 
---
  target/ppc/insn32.decode  |  2 ++
  target/ppc/translate.c| 26 ---
  .../ppc/translate/processor-ctrl-impl.c.inc   | 24 +
  3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index bba49ded1b..5ba4a6807d 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -913,3 +913,5 @@ TLBIEL  01 . - .. . . . 0100010010 -
@X_tlbie
  
  MSGCLR  01 - - . 0011101110 -   @X_rb

  MSGSND  01 - - . 0011001110 -   @X_rb
+MSGCLRP 01 - - . 0010101110 -   @X_rb
+MSGSNDP 01 - - . 0010001110 -   @X_rb
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 889cca6325..087ab8e69d 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6241,28 +6241,6 @@ static void gen_icbt_440(DisasContext *ctx)
  
  /* Embedded.Processor Control */
  
-#if defined(TARGET_PPC64)

-static void gen_msgclrp(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-GEN_PRIV(ctx);
-#else
-CHK_SV(ctx);
-gen_helper_book3s_msgclrp(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-static void gen_msgsndp(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-GEN_PRIV(ctx);
-#else
-CHK_SV(ctx);
-gen_helper_book3s_msgsndp(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-#endif
-
  static void gen_msgsync(DisasContext *ctx)
  {
  #if defined(CONFIG_USER_ONLY)
@@ -6896,10 +6874,6 @@ GEN_HANDLER(vmladduhm, 0x04, 0x11, 0xFF, 0x, 
PPC_ALTIVEC),
  GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x, PPC_NONE,
PPC2_ISA300),
  GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER2_E(msgsndp, "msgsndp", 0x1F, 0x0E, 0x04, 0x03ff0001,
-   PPC_NONE, PPC2_ISA207S),
-GEN_HANDLER2_E(msgclrp, "msgclrp", 0x1F, 0x0E, 0x05, 0x03ff0001,
-   PPC_NONE, PPC2_ISA207S),
  #endif
  
  #undef GEN_INT_ARITH_ADD

diff --git a/target/ppc/translate/processor-ctrl-impl.c.inc 
b/target/ppc/translate/processor-ctrl-impl.c.inc
index 0192b45c8f..3703001f31 100644
--- a/target/ppc/translate/processor-ctrl-impl.c.inc
+++ b/target/ppc/translate/processor-ctrl-impl.c.inc
@@ -68,3 +68,27 @@ static bool trans_MSGSND(DisasContext *ctx, arg_X_rb *a)
  #endif
  return true;
  }
+
+static bool trans_MSGCLRP(DisasContext *ctx, arg_X_rb *a)
+{
+REQUIRE_INSNS_FLAGS2(ctx, ISA207S);
+REQUIRE_SV(ctx);
+#if 

Re: has anybody looked at the long-standing intermittent raspi2 avocado test failure?

2022-10-19 Thread John Snow
On Tue, Oct 18, 2022 at 6:23 AM Peter Maydell  wrote:
>
> If you run this single avocado test:
>
> while ./build/x86/tests/venv/bin/avocado run
> build/x86/tests/avocado/boot_linux_console.py:BootLinuxConsole.test_arm_raspi2_initrd
> ; do true; done
>
> then on my machine it will fail within 4 or 5 iterations. Has anybody
> looked into this? Looking at a log from a failed test
> https://people.linaro.org/~peter.maydell/raspi-avocado-fail.log
> what seems to happen is that the test sends the "halt" command to
> the guest. The DEBUG log reports a transition from RUNNING to
> DISCONNECTING and the QMP connection disconnects, so presumably
> the guest really did exit -- or is that something the test framework
> is doing from its end?

The avocado test calls self.vm.wait(), which goes down this call
chain, all in QEMUMachine (machine.py):

wait()
.shutdown()
.._do_shutdown()
..._soft_shutdown()
_early_cleanup()
_close_qmp_connection()

QEMUMachine._early_cleanup, as you've noticed, closes the console
socket right away. It has this to say on the matter:

# If we keep the console socket open, we may deadlock waiting
# for QEMU to exit, while QEMU is waiting for the socket to
# become writable.

If we leave it open, QEMU is not guaranteed to close, and the
synchronous code in machine.py isn't good enough to poll both. This
was one motivation for switching the QMP library to asyncio, but those
changes haven't hit machine.py yet. I am hoping to have it done soon,
but some other tasks of finalizing splitting out the QMP library
itself took priority in my time budget before returning to converting
more utilities to asyncio.

Next, the QMP connection itself is closed, which is that transition
from RUNNING to DISCONNECTING you see from the "protocol" module in
Avocado's log.
(Avocado is truncating the full module names here; the real source
module names are "qemu.qmp.protocol", "qemu.qmp.qmp_client", and
"qemu.machine.machine".)

>Anyway, the next thing that happens is
> that after a 30s timeout something (the avocado framework?) sends
> the QEMU process a SIGTERM, and there's a bunch of timeout related
> python backtraces in the log.

It looks like the subprocess wait in _soft_shutdown never returns. We
assume that because wait() was called that the caller is expecting the
VM to exit and has presumably already issued a "quit" or "halt" or
equivalent, so the Machine code does not issue any further commands to
ensure this happens.

At this point, my guess is that -- based on your later issue filings
-- the "halt" command isn't actually getting processed because of the
rapid console disconnect, so the machine.py code deadlocks waiting for
QEMU to exit when it never will.

>
> In comparison, with a pass
> https://people.linaro.org/~peter.maydell/raspi-avocado-pass.log
> the transition from DISCONNECTING to IDLE is followed within about
> 5 seconds by a test pass.
>
> So:
>
> (1) has anybody looked at what's happening in this test in
> particular?
> (2) is it possible for avocado to log all of the communication
> on the serial console? Currently the logging of what the guest
> prints cuts out early (in both success and failure cases),
> and what the test case sends to the guest is not logged at all

The output logging being truncated is due to other problems as you've
noticed in later issue filings.

I think that Input logging could be added by overriding
ConsoleSocket.send() in console_socket.py; see
https://docs.python.org/3/library/socket.html#socket.socket.send for
signature and use _drain_socket and/or recv() as a reference basis.
You might want to not write to the same logfile to avoid an
unreadable, interleaved mess. The data buffers here are, I think, not
line-based and may not produce coherent interleaved output.

Naive and untested:
```
def send(self, data: bytes, flags: int = 0) -> bytes:
self._some_other_logfile.write(data)
self._some_other_logfile.flush()
return super().send(data, flags)
```

I am working on replacing machine.py with an async version which
should solve some of the socket problems; but if you want a band-aid
in the meantime I'm happy to take any patches to improve what we have
in the short-term. I will probably focus my own efforts on just
getting the rewrite in.

> (3) what's the best way to debug this to figure out what's
> actually happening?
>
> I tried passing timeout=None to the self.vm.wait() call in
> test_arm_raspi2_initrd(), which the comments in machine.py
> is supposed to mean "wait forever", but that didn't actually
> seem to change the timeout. (I had the idea of looking at
> the status of the QEMU process etc at that point.)

Hm ... The stack trace for the TimeoutExpired exception sure makes it
look like the Machine appliance is hitting its own timeout (Should be
at about 30 seconds -- matches what you observed), but if that's the
case, passing timeout=None should have alleviated that -- up until you
hit the Avocado 

Re: [PATCH v5 6/6] hw/arm/virt: Add 'compact-highmem' property

2022-10-19 Thread Eric Auger
Hi Gavin,

On 10/12/22 01:18, Gavin Shan wrote:
> After the improvement to high memory region address assignment is
> applied, the memory layout can be changed, introducing possible
> migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region
> is disabled or enabled when the optimization is applied or not, with
> the following configuration.
>
>   pa_bits  = 40;
>   vms->highmem_redists = false;
>   vms->highmem_ecam= false;
>   vms->highmem_mmio= true;
>
>   # qemu-system-aarch64 -accel kvm -cpu host\
> -machine virt-7.2,compact-highmem={on, off} \
> -m 4G,maxmem=511G -monitor stdio
>
>   Regioncompact-highmem=off compact-highmem=on
>   
>   RAM   [1GB 512GB][1GB 512GB]
>   HIGH_GIC_REDISTS  [512GB   512GB+64MB]   [disabled]
>   HIGH_PCIE_ECAM[512GB+256MB 512GB+512MB]  [disabled]
>   HIGH_PCIE_MMIO[disabled] [512GB   1TB]
>
> In order to keep backwords compatibility, we need to disable the
> optimization on machines, which is virt-7.1 or ealier than it. It
> means the optimization is enabled by default from virt-7.2. Besides,
> 'compact-highmem' property is added so that the optimization can be
> explicitly enabled or disabled on all machine types by users.
>
> Signed-off-by: Gavin Shan 
> Tested-by: Zhenyu Zhang 
> ---
>  docs/system/arm/virt.rst |  4 
>  hw/arm/virt.c| 47 
>  include/hw/arm/virt.h|  1 +
>  3 files changed, 52 insertions(+)
>
> diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
> index 20442ea2c1..75bf5a4994 100644
> --- a/docs/system/arm/virt.rst
> +++ b/docs/system/arm/virt.rst
> @@ -94,6 +94,10 @@ highmem
>address space above 32 bits. The default is ``on`` for machine types
>later than ``virt-2.12``.
>  
> +compact-highmem
> +  Set ``on``/``off`` to enable/disable compact space for high memory regions.
> +  The default is ``on`` for machine types later than ``virt-7.2``
> +
>  gic-version
>Specify the version of the Generic Interrupt Controller (GIC) to provide.
>Valid values are:
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index c05cfb5314..8f1dba0ece 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -174,6 +174,27 @@ static const MemMapEntry base_memmap[] = {
>   * Note the extended_memmap is sized so that it eventually also includes the
>   * base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last
>   * index of base_memmap).
> + *
> + * The addresses assigned to these regions are affected by 'compact-highmem'
> + * property, which is to enable or disable the compact space in the Highmem
> + * IO regions. For example, VIRT_HIGH_PCIE_MMIO can be disabled or enabled
> + * depending on the property in the following scenario.
To me you shall rather explain here what is the so-called "compact"
space vs the legacy highmem layout.

If I understand correctly the example rather legitimates the use of a
compat option showing how the layout can be affected by the option. I
would put that in the commit msg instead. Also in your example I see
VIRT_HIGH_GIC_REDISTS is disabled but the code does not disable the
region excpet if it does not fit within the PA. This does not match your
example. Also the region is named VIRT_HIGH_GIC_REDIST2.

In v4, Marc also suggested to have individual options for each highmem
region.
https://lore.kernel.org/qemu-devel/0f8e6a58-0dde-fb80-6966-7bb32c4df...@redhat.com/

Have you considered that option?

Thanks

Eric
> + *
> + * pa_bits  = 40;
> + * vms->highmem_redists = false;
> + * vms->highmem_ecam= false;
> + * vms->highmem_mmio= true;
> + *
> + * # qemu-system-aarch64 -accel kvm -cpu host\
> + *   -machine virt-7.2,compact-highmem={on, off} \
> + *   -m 4G,maxmem=511G -monitor stdio
> + *
> + * Regioncompact-highmem=offcompact-highmem=on
> + * 
> + * RAM   [1GB 512GB][1GB 512GB]
> + * HIGH_GIC_REDISTS  [512GB   512GB+64MB]   [disabled]
> + * HIGH_PCIE_ECAM[512GB+256GB 512GB+512MB]  [disabled]
> + * HIGH_PCIE_MMIO[disabled] [512GB   1TB]
>   */
>  static MemMapEntry extended_memmap[] = {
>  /* Additional 64 MB redist region (can contain up to 512 redistributors) 
> */
> @@ -2353,6 +2374,20 @@ static void virt_set_highmem(Object *obj, bool value, 
> Error **errp)
>  vms->highmem = value;
>  }
>  
> +static bool virt_get_compact_highmem(Object *obj, Error **errp)
> +{
> +VirtMachineState *vms = VIRT_MACHINE(obj);
> +
> +return vms->highmem_compact;
> +}
> +
> +static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
> +{
> +VirtMachineState *vms = VIRT_MACHINE(obj);
> +
> +vms->highmem_compact = value;
> +}
> +
>  static bool 

Re: [PATCH v5 5/6] hw/arm/virt: Improve high memory region address assignment

2022-10-19 Thread Eric Auger
Hi Gavin

On 10/12/22 01:18, Gavin Shan wrote:
> There are three high memory regions, which are VIRT_HIGH_REDIST2,
> VIRT_HIGH_PCIE_ECAM and VIRT_HIGH_PCIE_MMIO. Their base addresses
> are floating on highest RAM address. However, they can be disabled
> in several cases.
>
> (1) One specific high memory region is disabled by developer by
> toggling vms->highmem_{redists, ecam, mmio}.
I would replace the above sentence by

One specific high memory region is likely to be disabled by the code by 
toggling vms->highmem_{redists, ecam, mmio}:

>
> (2) VIRT_HIGH_PCIE_ECAM region is disabled on machine, which is
> 'virt-2.12' or ealier than it.
>
> (3) VIRT_HIGH_PCIE_ECAM region is disabled when firmware is loaded
> on 32-bits system.
>
> (4) One specific high memory region is disabled when it breaks the
> PA space limit.
>
> The current implementation of virt_set_memmap() isn't comprehensive
> because the space for one specific high memory region is always
> reserved from the PA space for case (1), (2) and (3).
I would suggest:
isn't optimized because the high memory region PA range is always

reserved whatever the actual state of the corresponding vms->highmem_
* flag.

>  In the code,
> 'base' and 'vms->highest_gpa' are always increased for those three
> cases. It's unnecessary since the assigned space of the disabled
> high memory region won't be used afterwards.
>
> This improves the address assignment for those three high memory
s/This improves/Improve
> region by skipping the address assignment for one specific high
> memory region if it has been disabled in case (1), (2) and (3).
> 'vms->high_compact' is false for now, meaning that we don't have
s/hight_compat/highmem_compact

You also may justify the introduction of this new field.
> any behavior changes until it becomes configurable through property
> 'compact-highmem' in next patch.
>
> Signed-off-by: Gavin Shan 
> Tested-by: Zhenyu Zhang 
> ---
>  hw/arm/virt.c | 23 +++
>  include/hw/arm/virt.h |  1 +
>  2 files changed, 16 insertions(+), 8 deletions(-)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index ee98a8a3b6..c05cfb5314 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -1717,22 +1717,29 @@ static void virt_set_high_memmap(VirtMachineState 
> *vms,
>  region_base = ROUND_UP(base, extended_memmap[i].size);
>  region_size = extended_memmap[i].size;
>  
> -vms->memmap[i].base = region_base;
> -vms->memmap[i].size = region_size;
> -
>  /*
>   * Check each device to see if they fit in the PA space,
while we are at it, you can change s/they fit/it fits
> - * moving highest_gpa as we go.
> + * moving highest_gpa as we go. For compatibility, move
> + * highest_gpa for disabled fitting devices as well, if
> + * the compact layout has been disabled.
>   *
>   * For each device that doesn't fit, disable it.
>   */
>  fits = (region_base + region_size) <= BIT_ULL(pa_bits);
> -if (fits) {
> +if (*region_enabled && fits) {
> +vms->memmap[i].base = region_base;
> +vms->memmap[i].size = region_size;
>  vms->highest_gpa = region_base + region_size - 1;
> +base = region_base + region_size;
> +} else {
> +*region_enabled = false;
> +if (!vms->highmem_compact) {
> +base = region_base + region_size;
> +if (fits) {
> +vms->highest_gpa = region_base + region_size - 1;
> +}
> +}
>  }
> -
> -*region_enabled &= fits;
> -base = region_base + region_size;
>  }
>  }
This looks quite complicated to me. It is not obvious for instance we
have the same code as before when highmem_compact is not set. Typically

vms->memmap[i].base/size are not always set as they were to be and impact on 
the rest of the code must be double checked.

Could this be rewritten in that way (pseudocode totally untested).


static void fit_highmem_slot(vms, *base, i, pa_bits)
{
    region_enabled = virt_get_high_memmap_enabled(vms, i);
    region_base = ROUND_UP(*base, extended_memmap[i].size);
    region_size = extended_memmap[i].size;
    fits = (region_base + region_size) <= BIT_ULL(pa_bits);
    *region_enabled &= fits;
    vms->memmap[i].base = region_base;
    vms->memmap[i].size = region_size;

    /* compact layout only allocates space for the region if this latter
is enabled & fits*/
    if (vms->highmem_compact && !region_enabled) {
        return;
    }

    /* account for the region and update the base address/highest_gpa if
needed*/ 
    *base = region_base + region_size;
    if (fits) { 
        vms->highest_gpa = *base - 1;
    }
}

static void virt_set_high_memmap(VirtMachineState *vms,
 hwaddr base, int pa_bits)
{
    hwaddr region_base, region_size;
    bool *region_enabled, fits;
    int 

Re: [RFC PATCH] target/s390x: fake instruction loading when handling 'ex'

2022-10-19 Thread Philippe Mathieu-Daudé

On 19/10/22 13:35, Alex Bennée wrote:

The s390x EXecute instruction is a bit weird as we synthesis the
executed instruction from what we have stored in memory. When plugins
are enabled this breaks because we detect the ld_code2() loading from
a non zero offset without the rest of the instruction being there.

Work around this with a special helper to inform the rest of the
translator about the instruction so things stay consistent.

Signed-off-by: Alex Bennée 
Cc: Richard Henderson 
---
  include/exec/translator.h| 17 +
  target/s390x/tcg/translate.c |  4 
  2 files changed, 21 insertions(+)

diff --git a/include/exec/translator.h b/include/exec/translator.h
index 3b77f5f4aa..156f568701 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -211,6 +211,23 @@ translator_ldq_swap(CPUArchState *env, DisasContextBase 
*db,
  return ret;
  }
  
+/**

+ * translator_fake_ldw - fake instruction load
+ * @insn16: 2 byte instruction
+ * @pc: program counter of instruction
+ *
+ * This is a special case helper used where the instruction we are
+ * about to translate comes from somewhere else (e.g. being
+ * re-synthesised for s390x "ex"). It ensures we update other areas of
+ * the translator with details of the executed instruction.
+ */
+
+static inline void translator_fake_ldw(uint16_t insn16, abi_ptr pc)
+{
+plugin_insn_append(pc, , sizeof(insn16));
+}
+
+
  /*
   * Return whether addr is on the same page as where disassembly started.
   * Translators can use this to enforce the rule that only single-insn
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 1d2dddab1c..a07b8b2d23 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -6317,12 +6317,16 @@ static const DisasInsn *extract_insn(CPUS390XState 
*env, DisasContext *s)
  if (unlikely(s->ex_value)) {
  /* Drop the EX data now, so that it's clear on exception paths.  */
  TCGv_i64 zero = tcg_const_i64(0);
+int i;
  tcg_gen_st_i64(zero, cpu_env, offsetof(CPUS390XState, ex_value));
  tcg_temp_free_i64(zero);
  
  /* Extract the values saved by EXECUTE.  */

  insn = s->ex_value & 0xull;
  ilen = s->ex_value & 0xf;
+for (i = 0; i < ilen; i += 2) {


Is it worth guarding with #ifdef CONFIG_PLUGIN?


+translator_fake_ldw(extract64(insn, 48 - (i * 8), 16), pc + i);
+}
  op = insn >> 56;
  } else {
  insn = ld_code2(env, s, pc);





Re: [PATCH 1/4] target/i386: decode-new: avoid out-of-bounds access to xmm_regs[-1]

2022-10-19 Thread Philippe Mathieu-Daudé

On 19/10/22 17:06, Paolo Bonzini wrote:

If the destination is a memory register, op->n is -1.  Going through
tcg_gen_gvec_dup_imm path is both useless (the value has been stored
by the gen_* function already) and wrong because of the out-of-bounds
access.

Signed-off-by: Paolo Bonzini 
---
  target/i386/tcg/emit.c.inc | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 27eca591a9..ebf299451d 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -296,7 +296,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn 
*decode, int opn, TCGv
  case X86_OP_MMX:
  break;
  case X86_OP_SSE:
-if ((s->prefix & PREFIX_VEX) && op->ot == MO_128) {
+if (!op->has_ea && (s->prefix & PREFIX_VEX) && op->ot == MO_128) {
  tcg_gen_gvec_dup_imm(MO_64,
   offsetof(CPUX86State, 
xmm_regs[op->n].ZMM_X(1)),
   16, 16, 0);


Fixes: 20581aadec ("target/i386: validate VEX prefixes via the 
instructions' exception classes")


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH] hw/acpi/erst.c: Fix memset argument order

2022-10-19 Thread Eric DeVolder




On 10/19/22 14:37, Philippe Mathieu-Daudé wrote:

On 19/10/22 21:15, Christian A. Ehrhardt wrote:

Fix memset argument order: The second argument is
the value, the length goes last.

Cc: Eric DeVolder 
Cc: qemu-sta...@nongnu.org
Fixes: f7e26ffa590 ("ACPI ERST: support for ACPI ERST feature")
Signed-off-by: Christian A. Ehrhardt 
---
  hw/acpi/erst.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c
index df856b2669..26391f93ca 100644
--- a/hw/acpi/erst.c
+++ b/hw/acpi/erst.c
@@ -716,7 +716,7 @@ static unsigned write_erst_record(ERSTDeviceState *s)
  if (nvram) {
  /* Write the record into the slot */
  memcpy(nvram, exchange, record_length);
-    memset(nvram + record_length, exchange_length - record_length, 0xFF);
+    memset(nvram + record_length, 0xFF, exchange_length - record_length);

Ouch

Sheesh, I'd hate to be that guy...

Reviewed-by: Eric DeVolder 


Reviewed-by: Philippe Mathieu-Daudé 





Re: [PATCH 2/4] target/i386: introduce function to set rounding mode from FPCW or MXCSR bits

2022-10-19 Thread Philippe Mathieu-Daudé

On 19/10/22 17:06, Paolo Bonzini wrote:

VROUND, FSTCW and STMXCSR all have to perform the same conversion from
x86 rounding modes to softfloat constants.  Since the ISA is consistent
on the meaning of the two-bit rounding modes, extract the common code
into a wrapper for set_float_rounding_mode.

Signed-off-by: Paolo Bonzini 
---
  target/i386/ops_sse.h| 60 +++-
  target/i386/tcg/fpu_helper.c | 60 +---
  2 files changed, 25 insertions(+), 95 deletions(-)



diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index a6a90a1817..6f3741b635 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -32,7 +32,8 @@
  #define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
  #define ST1ST(1)
  
-#define FPU_RC_MASK 0xc00

+#define FPU_RC_SHIFT10
+#define FPU_RC_MASK (3 << FPU_RC_SHIFT)
  #define FPU_RC_NEAR 0x000
  #define FPU_RC_DOWN 0x400
  #define FPU_RC_UP   0x800
@@ -685,28 +686,26 @@ uint32_t helper_fnstcw(CPUX86State *env)
  return env->fpuc;
  }
  
+static void set_x86_rounding_mode(unsigned mode, float_status *status)

+{
+static FloatRoundMode x86_round_mode[4] = {


static const, otherwise:

Reviewed-by: Philippe Mathieu-Daudé 


+float_round_nearest_even,
+float_round_down,
+float_round_up,
+float_round_to_zero
+};
+assert(mode < ARRAY_SIZE(x86_round_mode));
+set_float_rounding_mode(x86_round_mode[mode], status);
+}




Re: [PATCH] hw/acpi/erst.c: Fix memset argument order

2022-10-19 Thread Philippe Mathieu-Daudé

On 19/10/22 21:15, Christian A. Ehrhardt wrote:

Fix memset argument order: The second argument is
the value, the length goes last.

Cc: Eric DeVolder 
Cc: qemu-sta...@nongnu.org
Fixes: f7e26ffa590 ("ACPI ERST: support for ACPI ERST feature")
Signed-off-by: Christian A. Ehrhardt 
---
  hw/acpi/erst.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c
index df856b2669..26391f93ca 100644
--- a/hw/acpi/erst.c
+++ b/hw/acpi/erst.c
@@ -716,7 +716,7 @@ static unsigned write_erst_record(ERSTDeviceState *s)
  if (nvram) {
  /* Write the record into the slot */
  memcpy(nvram, exchange, record_length);
-memset(nvram + record_length, exchange_length - record_length, 0xFF);
+memset(nvram + record_length, 0xFF, exchange_length - record_length);

Ouch

Reviewed-by: Philippe Mathieu-Daudé 




Re: [RESEND PATCH] hw/mem/nvdimm: fix error message for 'unarmed' flag

2022-10-19 Thread Julia Suvorova
On Tue, Oct 18, 2022 at 6:49 PM Michael S. Tsirkin  wrote:
>
> On Tue, Oct 18, 2022 at 06:17:55PM +0200, Philippe Mathieu-Daudé wrote:
> > On 18/10/22 17:25, Julia Suvorova wrote:
> > > In the ACPI specification [1], the 'unarmed' bit is set when a device
> > > cannot accept a persistent write. This means that when a memdev is
> > > read-only, the 'unarmed' flag must be turned on. The logic is correct,
> > > just changing the error message.
> > >
> > > [1] ACPI NFIT NVDIMM Region Mapping Structure "NVDIMM State Flags" Bit 3
> > >
> >
> > Fixes: dbd730e859 ("nvdimm: check -object memory-backend-file, readonly=on
> > option")
> >
> > The documentation in 'docs/nvdimm.txt' is correct :)
> >
> > > Signed-off-by: Julia Suvorova 
> > > Reviewed-by: Stefan Hajnoczi 
> > > ---
> > >   hw/mem/nvdimm.c | 2 +-
> > >   1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
> > > index 7c7d81..bfb76818c1 100644
> > > --- a/hw/mem/nvdimm.c
> > > +++ b/hw/mem/nvdimm.c
> > > @@ -149,7 +149,7 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice 
> > > *nvdimm, Error **errp)
> > >   if (!nvdimm->unarmed && memory_region_is_rom(mr)) {
> > >   HostMemoryBackend *hostmem = dimm->hostmem;
> > > -error_setg(errp, "'unarmed' property must be off since memdev %s 
> > > "
> > > +error_setg(errp, "'unarmed' property must be on since memdev %s "
> >
> > If you ever respin please quote 'on' for readability.
>
>
> Yes make sense. Julia could you change this pls?

Sure, will do.

> > >  "is read-only",
> > >  
> > > object_get_canonical_path_component(OBJECT(hostmem)));
> > >   return;
> >
> > Reviewed-by: Philippe Mathieu-Daudé 
>




[PATCH] hw/acpi/erst.c: Fix memset argument order

2022-10-19 Thread Christian A. Ehrhardt
Fix memset argument order: The second argument is
the value, the length goes last.

Cc: Eric DeVolder 
Cc: qemu-sta...@nongnu.org
Fixes: f7e26ffa590 ("ACPI ERST: support for ACPI ERST feature")
Signed-off-by: Christian A. Ehrhardt 
---
 hw/acpi/erst.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c
index df856b2669..26391f93ca 100644
--- a/hw/acpi/erst.c
+++ b/hw/acpi/erst.c
@@ -716,7 +716,7 @@ static unsigned write_erst_record(ERSTDeviceState *s)
 if (nvram) {
 /* Write the record into the slot */
 memcpy(nvram, exchange, record_length);
-memset(nvram + record_length, exchange_length - record_length, 0xFF);
+memset(nvram + record_length, 0xFF, exchange_length - record_length);
 /* If a new record, increment the record_count */
 if (!record_found) {
 uint32_t record_count;
-- 
2.34.1




[RFC PATCH] tests/avocado: set -machine none for userfwd and vnc tests

2022-10-19 Thread Alex Bennée
These are exercising core QEMU features and don't actually run code.
Not specifying a machine will fail when avocado chooses the native
arch binary to run. Be explicit.

Signed-off-by: Alex Bennée 
---
 tests/avocado/info_usernet.py | 3 +++
 tests/avocado/vnc.py  | 1 +
 2 files changed, 4 insertions(+)

diff --git a/tests/avocado/info_usernet.py b/tests/avocado/info_usernet.py
index b862a47dba..fdc4d90c42 100644
--- a/tests/avocado/info_usernet.py
+++ b/tests/avocado/info_usernet.py
@@ -14,6 +14,9 @@
 
 
 class InfoUsernet(QemuSystemTest):
+"""
+:avocado: tags=machine:none
+"""
 
 def test_hostfwd(self):
 self.require_netdev('user')
diff --git a/tests/avocado/vnc.py b/tests/avocado/vnc.py
index 187fd3febc..aeeefc70be 100644
--- a/tests/avocado/vnc.py
+++ b/tests/avocado/vnc.py
@@ -53,6 +53,7 @@ def find_free_ports(count: int) -> List[int]:
 class Vnc(QemuSystemTest):
 """
 :avocado: tags=vnc,quick
+:avocado: tags=machine:none
 """
 def test_no_vnc(self):
 self.vm.add_args('-nodefaults', '-S')
-- 
2.34.1




Re: [PATCH v8 5/8] KVM: Register/unregister the guest private memory regions

2022-10-19 Thread Fuad Tabba
On Wed, Oct 19, 2022 at 5:09 PM Sean Christopherson  wrote:
>
> On Wed, Oct 19, 2022, Fuad Tabba wrote:
> > > > > This sounds good. Thank you.
> > > >
> > > > I like the idea of a separate Kconfig, e.g. 
> > > > CONFIG_KVM_GENERIC_PRIVATE_MEM or
> > > > something.  I highly doubt there will be any non-x86 users for multiple 
> > > > years,
> > > > if ever, but it would allow testing the private memory stuff on ARM 
> > > > (and any other
> > > > non-x86 arch) without needing full pKVM support and with only minor KVM
> > > > modifications, e.g. the x86 support[*] to test UPM without TDX is 
> > > > shaping up to be
> > > > trivial.
> > >
> > > CONFIG_KVM_GENERIC_PRIVATE_MEM looks good to me.
> >
> > That sounds good to me, and just keeping the xarray isn't really an
> > issue for pKVM.
>
> The xarray won't exist for pKVM if the #ifdefs in this patch are changed from
> CONFIG_HAVE_KVM_PRIVATE_MEM => CONFIG_KVM_GENERIC_PRIVATE_MEM.
>
> > We could end up using it instead of some of the other
> > structures we use for tracking.
>
> I don't think pKVM should hijack the xarray for other purposes.  At best, it 
> will
> be confusing, at worst we'll end up with a mess if ARM ever supports the 
> "generic"
> implementation.

Definitely wasn't referring to hijacking it for other purposes, which
is the main reason I wanted to clarify the documentation and the
naming of private_fd. Anyway, I'm glad to see that we're in agreement.
Once I've tightened the screws, I'll share the pKVM port as an RFC as
well.

Cheers,
/fuad



Re: [PATCH v10 1/9] s390x/cpu topology: core_id sets s390x CPU topology

2022-10-19 Thread Janis Schoetterl-Glausch
On Wed, 2022-10-19 at 17:39 +0200, Pierre Morel wrote:
> 
> On 10/18/22 18:43, Cédric Le Goater wrote:
[...]
> > 
> > > diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
> > > new file mode 100644
> > > index 00..42b22a1831
> > > --- /dev/null
> > > +++ b/hw/s390x/cpu-topology.c
> > > @@ -0,0 +1,132 @@
> > > +/*
> > > + * CPU Topology
> > > + *
> > > + * Copyright IBM Corp. 2022
> > 
> > The Copyright tag is different in the .h file.
> 
> OK, I change this to be like in the header file it seems to be the most 
> used format.
> 
No, this form, with the date at the end, is the correct one.
> 

[...]



Re: socket chardevs: data loss when other end closes connection?

2022-10-19 Thread Daniel P . Berrangé
On Wed, Oct 19, 2022 at 05:26:28PM +0100, Peter Maydell wrote:
> On Tue, 18 Oct 2022 at 20:21, Daniel P. Berrangé  wrote:
> >
> > On Tue, Oct 18, 2022 at 06:55:08PM +0100, Peter Maydell wrote:
> > > How is this intended to work? I guess the socket ought to go
> > > into some kind of "disconnecting" state, but not actually do
> > > a tcp_chr_disconnect() until all the data has been read via
> > > tcp_chr_read() and it's finally got an EOF indication back from
> > > tcp_chr_recv() ?
> >
> > Right, this is basically broken by (lack of) design right now.
> >
> > The main problem here is that we're watching the socket twice.
> > One set of callbacks added with io_add_watch_poll, and then
> > a second callback added with qio_chanel_create_watch just for
> > G_IO_HUP.
> >
> > We need there to be only 1 callback, and when that callback
> > gets  G_IO_IN, it should *ignore* G_IO_HUP until tcp_chr_recv
> > returns 0 to indicate EOF. This would cause tcp_chr_read to
> > be invoked repeatedly with G_IO_IN | G_IO_HUP, as we read
> > "halt\r" one byte at a time.
> 
> Makes sense.
> 
> I've filed https://gitlab.com/qemu-project/qemu/-/issues/1264 to
> track this socket chardev bug.
> 
> It did occur to me that there's a potential complication with
> the 'server' mode of this chardev: does it need to cope with
> a new connection coming into the server socket while the old
> fd is still hanging around in this "waiting for the guest to
> read it" state? Currently tcp_chr_disconnect_locked() is where
> we restart listening for new connections, so QEMU wouldn't
> accept any new connection until the guest had got round to
> completely draining the data from the old one.

That's fine IMHO. We never actually stop listening at a socket
level, we just stop trying to accept(). So any new client will
get queued until we've drained data, then accept()d and its
new data handled

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: socket chardevs: data loss when other end closes connection?

2022-10-19 Thread Peter Maydell
On Tue, 18 Oct 2022 at 20:21, Daniel P. Berrangé  wrote:
>
> On Tue, Oct 18, 2022 at 06:55:08PM +0100, Peter Maydell wrote:
> > How is this intended to work? I guess the socket ought to go
> > into some kind of "disconnecting" state, but not actually do
> > a tcp_chr_disconnect() until all the data has been read via
> > tcp_chr_read() and it's finally got an EOF indication back from
> > tcp_chr_recv() ?
>
> Right, this is basically broken by (lack of) design right now.
>
> The main problem here is that we're watching the socket twice.
> One set of callbacks added with io_add_watch_poll, and then
> a second callback added with qio_chanel_create_watch just for
> G_IO_HUP.
>
> We need there to be only 1 callback, and when that callback
> gets  G_IO_IN, it should *ignore* G_IO_HUP until tcp_chr_recv
> returns 0 to indicate EOF. This would cause tcp_chr_read to
> be invoked repeatedly with G_IO_IN | G_IO_HUP, as we read
> "halt\r" one byte at a time.

Makes sense.

I've filed https://gitlab.com/qemu-project/qemu/-/issues/1264 to
track this socket chardev bug.

It did occur to me that there's a potential complication with
the 'server' mode of this chardev: does it need to cope with
a new connection coming into the server socket while the old
fd is still hanging around in this "waiting for the guest to
read it" state? Currently tcp_chr_disconnect_locked() is where
we restart listening for new connections, so QEMU wouldn't
accept any new connection until the guest had got round to
completely draining the data from the old one.

-- PMM



Re: [PATCH v1 09/12] accel/xen/xen-all: export xenstore_record_dm_state

2022-10-19 Thread Paul Durrant

On 15/10/2022 06:07, Vikram Garhwal wrote:

xenstore_record_dm_state() will also be used in aarch64 xenpv machine.

Signed-off-by: Vikram Garhwal 
Signed-off-by: Stefano Stabellini 


Reviewed-by: Paul Durrant 




Re: [PATCH v1 06/12] xen-hvm: move common functions to hw/xen/xen-hvm-common.c

2022-10-19 Thread Paul Durrant

On 15/10/2022 06:07, Vikram Garhwal wrote:
[snip]

+qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
+
+state->memory_listener = xen_memory_listener;
+memory_listener_register(>memory_listener, _space_memory);
+
+state->io_listener = xen_io_listener;
+memory_listener_register(>io_listener, _space_io);
+
+state->device_listener = xen_device_listener;
+QLIST_INIT(>dev_list);
+device_listener_register(>device_listener);
+


As Julien said, these do not belong here. These are the (current and 
legacy) PV backend setup functions; they most certainly have nothing to 
do with device emulation.


  Paul




Re: [PATCH v1 05/12] hw/i386/xen/xen-hvm: create arch_handle_ioreq and arch_xen_set_memory

2022-10-19 Thread Paul Durrant

On 15/10/2022 06:07, Vikram Garhwal wrote:

From: Stefano Stabellini 

In preparation to moving most of xen-hvm code to an arch-neutral location,
move the x86-specific portion of xen_set_memory to arch_xen_set_memory.

Also move handle_vmport_ioreq to arch_handle_ioreq.

NOTE: This patch breaks the build. Next patch fixes the build issue.
Reason behind creating this patch is because there is lot of new code addition
and pure code movement done for enabling Xen on ARM. Keeping the this patch
separate is done to make it easier to review.

Signed-off-by: Stefano Stabellini 
Signed-off-by: Vikram Garhwal 


Reviewed-by: Paul Durrant 




Re: [PATCH v8 5/8] KVM: Register/unregister the guest private memory regions

2022-10-19 Thread Sean Christopherson
On Wed, Oct 19, 2022, Fuad Tabba wrote:
> > > > This sounds good. Thank you.
> > >
> > > I like the idea of a separate Kconfig, e.g. 
> > > CONFIG_KVM_GENERIC_PRIVATE_MEM or
> > > something.  I highly doubt there will be any non-x86 users for multiple 
> > > years,
> > > if ever, but it would allow testing the private memory stuff on ARM (and 
> > > any other
> > > non-x86 arch) without needing full pKVM support and with only minor KVM
> > > modifications, e.g. the x86 support[*] to test UPM without TDX is shaping 
> > > up to be
> > > trivial.
> >
> > CONFIG_KVM_GENERIC_PRIVATE_MEM looks good to me.
> 
> That sounds good to me, and just keeping the xarray isn't really an
> issue for pKVM.

The xarray won't exist for pKVM if the #ifdefs in this patch are changed from
CONFIG_HAVE_KVM_PRIVATE_MEM => CONFIG_KVM_GENERIC_PRIVATE_MEM.

> We could end up using it instead of some of the other
> structures we use for tracking.

I don't think pKVM should hijack the xarray for other purposes.  At best, it 
will
be confusing, at worst we'll end up with a mess if ARM ever supports the 
"generic"
implementation.  



[PATCH v7 4/8] ppc4xx_sdram: Use hwaddr for memory bank size

2022-10-19 Thread BALATON Zoltan
This resolves the target_ulong dependency that's clearly wrong and was
also noted in a fixme comment.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Philippe Mathieu-Daudé 
---
 hw/ppc/ppc4xx_sdram.c | 14 --
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/hw/ppc/ppc4xx_sdram.c b/hw/ppc/ppc4xx_sdram.c
index 62ef7d8f0d..2294747594 100644
--- a/hw/ppc/ppc4xx_sdram.c
+++ b/hw/ppc/ppc4xx_sdram.c
@@ -34,7 +34,6 @@
 #include "qapi/error.h"
 #include "qemu/log.h"
 #include "exec/address-spaces.h" /* get_system_memory() */
-#include "exec/cpu-defs.h" /* target_ulong */
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/ppc/ppc4xx.h"
@@ -126,11 +125,6 @@ enum {
 
 /*/
 /* DDR SDRAM controller */
-/*
- * XXX: TOFIX: some patches have made this code become inconsistent:
- *  there are type inconsistencies, mixing hwaddr, target_ulong
- *  and uint32_t
- */
 static uint32_t sdram_ddr_bcr(hwaddr ram_base, hwaddr ram_size)
 {
 uint32_t bcr;
@@ -174,9 +168,9 @@ static inline hwaddr sdram_ddr_base(uint32_t bcr)
 return bcr & 0xFF80;
 }
 
-static target_ulong sdram_ddr_size(uint32_t bcr)
+static hwaddr sdram_ddr_size(uint32_t bcr)
 {
-target_ulong size;
+hwaddr size;
 int sh;
 
 sh = (bcr >> 17) & 0x7;
@@ -523,9 +517,9 @@ static inline hwaddr sdram_ddr2_base(uint32_t bcr)
 return (bcr & 0xffe0) << 2;
 }
 
-static uint64_t sdram_ddr2_size(uint32_t bcr)
+static hwaddr sdram_ddr2_size(uint32_t bcr)
 {
-uint64_t size;
+hwaddr size;
 int sh;
 
 sh = 1024 - ((bcr >> 6) & 0x3ff);
-- 
2.30.4




Re: [PATCH v5 03/10] acpi/tests/avocado/bits: disable acpi PSS tests that are failing in biosbits

2022-10-19 Thread Daniel P . Berrangé
On Wed, Oct 19, 2022 at 04:30:57PM +0100, Alex Bennée wrote:
> 
> Ani Sinha  writes:
> 
> > PSS tests in acpi test suite seems to be failing in biosbits. This is 
> > because
> > the test is unable to find PSS support in QEMU bios. Let us disable
> > them for now so that make check does not fail. We can fix the tests and
> > re-enable them later.
> >
> > Example failure:
> >
> >  ACPI _PSS (Pstate) table conformance tests 
> > [assert] _PSS must exist FAIL
> >   \_SB_.CPUS.C000
> >   No _PSS exists
> > Summary: 1 passed, 1 failed
> >  ACPI _PSS (Pstate) runtime tests 
> > [assert] _PSS must exist FAIL
> >   \_SB_.CPUS.C000
> >   No _PSS exists
> > Summary: 0 passed, 1 failed
> >
> > Cc: Daniel P. Berrangé 
> > Cc: Paolo Bonzini 
> > Cc: Maydell Peter 
> > Cc: John Snow 
> > Cc: Thomas Huth 
> > Cc: Alex Bennée 
> > Cc: Igor Mammedov 
> > Cc: Michael Tsirkin 
> > Signed-off-by: Ani Sinha 
> > ---
> >  tests/avocado/acpi-bits/bits-tests/testacpi.py2 | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/tests/avocado/acpi-bits/bits-tests/testacpi.py2 
> > b/tests/avocado/acpi-bits/bits-tests/testacpi.py2
> > index 18dc818d62..f818a9cce6 100644
> > --- a/tests/avocado/acpi-bits/bits-tests/testacpi.py2
> > +++ b/tests/avocado/acpi-bits/bits-tests/testacpi.py2
> > @@ -40,8 +40,8 @@ import time
> >  
> >  def register_tests():
> >  testsuite.add_test("ACPI _MAT (Multiple APIC Table Entry) under 
> > Processor objects", test_mat, submenu="ACPI Tests")
> > -testsuite.add_test("ACPI _PSS (Pstate) table conformance tests", 
> > test_pss, submenu="ACPI Tests")
> > -testsuite.add_test("ACPI _PSS (Pstate) runtime tests", test_pstates, 
> > submenu="ACPI Tests")
> > +#testsuite.add_test("ACPI _PSS (Pstate) table conformance tests", 
> > test_pss, submenu="ACPI Tests")
> > +#testsuite.add_test("ACPI _PSS (Pstate) runtime tests", test_pstates, 
> > submenu="ACPI Tests")
> >  testsuite.add_test("ACPI DSDT (Differentiated System Description 
> > Table)", test_dsdt, submenu="ACPI Tests")
> >  testsuite.add_test("ACPI FACP (Fixed ACPI Description Table)", 
> > test_facp, submenu="ACPI Tests")
> >  testsuite.add_test("ACPI HPET (High Precision Event Timer Table)", 
> > test_hpet, submenu="ACPI Tests")
> 
> I think this breaks bisection so should probably be included in the
> commit that add the test with a comment in the commit message.

IIUC, this is all unreachable code until Patch 08 adds the avocado
test providing the entry point, so bisect ought to be safe.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v2 03/11] migration: Make migration json writer part of MigrationState struct

2022-10-19 Thread Daniel P . Berrangé
On Wed, Oct 19, 2022 at 06:43:46PM +0300, Nikolay Borisov wrote:
> 
> 
> On 18.10.22 г. 13:06 ч., Daniel P. Berrangé wrote:
> > On Mon, Oct 10, 2022 at 04:34:00PM +0300, Nikolay Borisov wrote:
> > > This is required so that migration stream configuration is written
> > > to the migration stream. This would allow analyze-migration to
> > > parse enabled capabilities for the migration and adjust its behavior
> > > accordingly. This is in preparation for analyze-migration.py to support
> > > 'fixed-ram' capability format changes.
> > > 
> > > Signed-off-by: Nikolay Borisov 
> > > ---
> > >   migration/migration.c |  5 +
> > >   migration/migration.h |  3 +++
> > >   migration/savevm.c| 38 ++
> > >   3 files changed, 30 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/migration/migration.c b/migration/migration.c
> > > index 140b0f1a54bd..d0779bbaf862 100644
> > > --- a/migration/migration.c
> > > +++ b/migration/migration.c
> > > @@ -1896,6 +1896,8 @@ static void migrate_fd_cleanup(MigrationState *s)
> > >   g_free(s->hostname);
> > >   s->hostname = NULL;
> > > +json_writer_free(s->vmdesc);
> > > +
> > >   qemu_savevm_state_cleanup();
> > >   if (s->to_dst_file) {
> > > @@ -2154,6 +2156,7 @@ void migrate_init(MigrationState *s)
> > >   error_free(s->error);
> > >   s->error = NULL;
> > >   s->hostname = NULL;
> > > +s->vmdesc = NULL;
> > >   migrate_set_state(>state, MIGRATION_STATUS_NONE, 
> > > MIGRATION_STATUS_SETUP);
> > > @@ -4269,6 +4272,8 @@ void migrate_fd_connect(MigrationState *s, Error 
> > > *error_in)
> > >   return;
> > >   }
> > > +s->vmdesc = json_writer_new(false);
> > > +
> > >   if (multifd_save_setup(_err) != 0) {
> > >   error_report_err(local_err);
> > >   migrate_set_state(>state, MIGRATION_STATUS_SETUP,
> > > diff --git a/migration/migration.h b/migration/migration.h
> > > index cdad8aceaaab..96f27aba2210 100644
> > > --- a/migration/migration.h
> > > +++ b/migration/migration.h
> > > @@ -17,6 +17,7 @@
> > >   #include "exec/cpu-common.h"
> > >   #include "hw/qdev-core.h"
> > >   #include "qapi/qapi-types-migration.h"
> > > +#include "qapi/qmp/json-writer.h"
> > >   #include "qemu/thread.h"
> > >   #include "qemu/coroutine_int.h"
> > >   #include "io/channel.h"
> > > @@ -261,6 +262,8 @@ struct MigrationState {
> > >   int state;
> > > +JSONWriter *vmdesc;
> > > +
> > >   /* State related to return path */
> > >   struct {
> > >   /* Protected by qemu_file_lock */
> > > diff --git a/migration/savevm.c b/migration/savevm.c
> > > index 48e85c052c2c..174cdbefc29d 100644
> > > --- a/migration/savevm.c
> > > +++ b/migration/savevm.c
> > > @@ -1137,13 +1137,18 @@ void qemu_savevm_non_migratable_list(strList 
> > > **reasons)
> > >   void qemu_savevm_state_header(QEMUFile *f)
> > >   {
> > > +MigrationState *s = migrate_get_current();
> > >   trace_savevm_state_header();
> > >   qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
> > >   qemu_put_be32(f, QEMU_VM_FILE_VERSION);
> > > -if (migrate_get_current()->send_configuration) {
> > > +if (s->send_configuration) {
> > >   qemu_put_byte(f, QEMU_VM_CONFIGURATION);
> > > -vmstate_save_state(f, _configuration, _state, 0);
> > > + json_writer_start_object(s->vmdesc, NULL);
> > > + json_writer_start_object(s->vmdesc, "configuration");
> > > +vmstate_save_state(f, _configuration, _state, 
> > > s->vmdesc);
> > > + json_writer_end_object(s->vmdesc);
> > > +
> > 
> > IIUC, this is changing the info that is written in the VM
> > configuration section, by adding an extra level of nesting
> > to the object.
> > 
> > Isn't this going to cause backwards compatibility problems ?
> > 
> > Nothing in the patch seems to take account of the exctra
> > 'configuiration' object that has been started
> 
> The resulting json looks like:
> 
> {
> "configuration": {
> "vmsd_name": "configuration",
> "version": 1,
> "fields": [
> {
> "name": "len",
> "type": "uint32",
> "size": 4
> },
> {
> "name": "name",
> "type": "buffer",
> "size": 13
> }
> ],
> "subsections": [
> {
> "vmsd_name": "configuration/capabilities",
> "version": 1,
> "fields": [
> {
> "name": "caps_count",
> "type": "uint32",
> "size": 4
> },
> {
> "name": "capabilities",
> "type": "capability",
> "size": 10
> }
> ]
> }
> ]
> },
> "page_size": 4096,
> "devices": [
> {
> "name": "timer",
> 

[PATCH v7 2/8] ppc4xx_devs.c: Move DDR SDRAM controller model to ppc4xx_sdram.c

2022-10-19 Thread BALATON Zoltan
Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc4xx_devs.c  | 352 
 hw/ppc/ppc4xx_sdram.c | 365 ++
 2 files changed, 365 insertions(+), 352 deletions(-)

diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c
index 12af90f244..f737dbb3d6 100644
--- a/hw/ppc/ppc4xx_devs.c
+++ b/hw/ppc/ppc4xx_devs.c
@@ -24,357 +24,10 @@
 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
-#include "sysemu/reset.h"
 #include "cpu.h"
-#include "hw/irq.h"
-#include "hw/ppc/ppc.h"
 #include "hw/ppc/ppc4xx.h"
 #include "hw/qdev-properties.h"
-#include "qemu/log.h"
-#include "exec/address-spaces.h"
-#include "qemu/error-report.h"
 #include "qapi/error.h"
-#include "trace.h"
-
-/*/
-/* SDRAM controller */
-enum {
-SDRAM0_CFGADDR = 0x010,
-SDRAM0_CFGDATA = 0x011,
-};
-
-/*
- * XXX: TOFIX: some patches have made this code become inconsistent:
- *  there are type inconsistencies, mixing hwaddr, target_ulong
- *  and uint32_t
- */
-static uint32_t sdram_ddr_bcr(hwaddr ram_base, hwaddr ram_size)
-{
-uint32_t bcr;
-
-switch (ram_size) {
-case 4 * MiB:
-bcr = 0;
-break;
-case 8 * MiB:
-bcr = 0x2;
-break;
-case 16 * MiB:
-bcr = 0x4;
-break;
-case 32 * MiB:
-bcr = 0x6;
-break;
-case 64 * MiB:
-bcr = 0x8;
-break;
-case 128 * MiB:
-bcr = 0xA;
-break;
-case 256 * MiB:
-bcr = 0xC;
-break;
-default:
-qemu_log_mask(LOG_GUEST_ERROR,
-  "%s: invalid RAM size 0x%" HWADDR_PRIx "\n", __func__,
-  ram_size);
-return 0;
-}
-bcr |= ram_base & 0xFF80;
-bcr |= 1;
-
-return bcr;
-}
-
-static inline hwaddr sdram_ddr_base(uint32_t bcr)
-{
-return bcr & 0xFF80;
-}
-
-static target_ulong sdram_ddr_size(uint32_t bcr)
-{
-target_ulong size;
-int sh;
-
-sh = (bcr >> 17) & 0x7;
-if (sh == 7) {
-size = -1;
-} else {
-size = (4 * MiB) << sh;
-}
-
-return size;
-}
-
-static void sdram_ddr_set_bcr(Ppc4xxSdramDdrState *sdram, int i,
-  uint32_t bcr, int enabled)
-{
-if (sdram->bank[i].bcr & 1) {
-/* Unmap RAM */
-trace_ppc4xx_sdram_unmap(sdram_ddr_base(sdram->bank[i].bcr),
- sdram_ddr_size(sdram->bank[i].bcr));
-memory_region_del_subregion(get_system_memory(),
->bank[i].container);
-memory_region_del_subregion(>bank[i].container,
->bank[i].ram);
-object_unparent(OBJECT(>bank[i].container));
-}
-sdram->bank[i].bcr = bcr & 0xFFDEE001;
-if (enabled && (bcr & 1)) {
-trace_ppc4xx_sdram_map(sdram_ddr_base(bcr), sdram_ddr_size(bcr));
-memory_region_init(>bank[i].container, NULL, "sdram-container",
-   sdram_ddr_size(bcr));
-memory_region_add_subregion(>bank[i].container, 0,
->bank[i].ram);
-memory_region_add_subregion(get_system_memory(),
-sdram_ddr_base(bcr),
->bank[i].container);
-}
-}
-
-static void sdram_ddr_map_bcr(Ppc4xxSdramDdrState *sdram)
-{
-int i;
-
-for (i = 0; i < sdram->nbanks; i++) {
-if (sdram->bank[i].size != 0) {
-sdram_ddr_set_bcr(sdram, i, sdram_ddr_bcr(sdram->bank[i].base,
-  sdram->bank[i].size), 1);
-} else {
-sdram_ddr_set_bcr(sdram, i, 0, 0);
-}
-}
-}
-
-static void sdram_ddr_unmap_bcr(Ppc4xxSdramDdrState *sdram)
-{
-int i;
-
-for (i = 0; i < sdram->nbanks; i++) {
-trace_ppc4xx_sdram_unmap(sdram_ddr_base(sdram->bank[i].bcr),
- sdram_ddr_size(sdram->bank[i].bcr));
-memory_region_del_subregion(get_system_memory(),
->bank[i].ram);
-}
-}
-
-static uint32_t sdram_ddr_dcr_read(void *opaque, int dcrn)
-{
-Ppc4xxSdramDdrState *sdram = opaque;
-uint32_t ret;
-
-switch (dcrn) {
-case SDRAM0_CFGADDR:
-ret = sdram->addr;
-break;
-case SDRAM0_CFGDATA:
-switch (sdram->addr) {
-case 0x00: /* SDRAM_BESR0 */
-ret = sdram->besr0;
-break;
-case 0x08: /* SDRAM_BESR1 */
-ret = sdram->besr1;
-break;
-case 0x10: /* SDRAM_BEAR */
-ret = sdram->bear;
-break;
-case 0x20: /* SDRAM_CFG */
-ret = sdram->cfg;
-break;
-case 0x24: /* SDRAM_STATUS */
-ret = sdram->status;
-break;
-case 0x30: /* SDRAM_RTR */
-ret = sdram->rtr;
-

[PATCH v7 8/8] ppc4xx_sdram: Add errp parameter to ppc4xx_sdram_banks()

2022-10-19 Thread BALATON Zoltan
Do not exit from ppc4xx_sdram_banks() but report error via an errp
parameter instead.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Philippe Mathieu-Daudé 
---
 hw/ppc/ppc4xx_sdram.c | 28 +++-
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/hw/ppc/ppc4xx_sdram.c b/hw/ppc/ppc4xx_sdram.c
index 7c097efe20..8d7137faf3 100644
--- a/hw/ppc/ppc4xx_sdram.c
+++ b/hw/ppc/ppc4xx_sdram.c
@@ -52,10 +52,12 @@
  * must be one of a small set of sizes. The number of banks and the supported
  * sizes varies by SoC.
  */
-static void ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks,
+static bool ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks,
Ppc4xxSdramBank ram_banks[],
-   const ram_addr_t sdram_bank_sizes[])
+   const ram_addr_t sdram_bank_sizes[],
+   Error **errp)
 {
+ERRP_GUARD();
 ram_addr_t size_left = memory_region_size(ram);
 ram_addr_t base = 0;
 ram_addr_t bank_size;
@@ -93,14 +95,16 @@ static void ppc4xx_sdram_banks(MemoryRegion *ram, int 
nr_banks,
sdram_bank_sizes[i] / MiB,
sdram_bank_sizes[i + 1] ? ", " : "");
 }
-error_report("at most %d bank%s of %s MiB each supported",
- nr_banks, nr_banks == 1 ? "" : "s", s->str);
-error_printf("Possible valid RAM size: %" PRIi64 " MiB\n",
-used_size ? used_size / MiB : sdram_bank_sizes[i - 1] / MiB);
+error_setg(errp, "Invalid SDRAM banks");
+error_append_hint(errp, "at most %d bank%s of %s MiB each supported\n",
+  nr_banks, nr_banks == 1 ? "" : "s", s->str);
+error_append_hint(errp, "Possible valid RAM size: %" PRIi64 " MiB\n",
+  used_size ? used_size / MiB : sdram_bank_sizes[i - 1] / MiB);
 
 g_string_free(s, true);
-exit(EXIT_FAILURE);
+return false;
 }
+return true;
 }
 
 static void sdram_bank_map(Ppc4xxSdramBank *bank)
@@ -399,7 +403,10 @@ static void ppc4xx_sdram_ddr_realize(DeviceState *dev, 
Error **errp)
 error_setg(errp, "Missing dram memory region");
 return;
 }
-ppc4xx_sdram_banks(s->dram_mr, s->nbanks, s->bank, valid_bank_sizes);
+if (!ppc4xx_sdram_banks(s->dram_mr, s->nbanks, s->bank,
+valid_bank_sizes, errp)) {
+return;
+}
 for (i = 0; i < s->nbanks; i++) {
 if (s->bank[i].size) {
 s->bank[i].bcr = sdram_ddr_bcr(s->bank[i].base, s->bank[i].size);
@@ -666,7 +673,10 @@ static void ppc4xx_sdram_ddr2_realize(DeviceState *dev, 
Error **errp)
 error_setg(errp, "Missing dram memory region");
 return;
 }
-ppc4xx_sdram_banks(s->dram_mr, s->nbanks, s->bank, valid_bank_sizes);
+if (!ppc4xx_sdram_banks(s->dram_mr, s->nbanks, s->bank,
+valid_bank_sizes, errp)) {
+return;
+}
 for (i = 0; i < s->nbanks; i++) {
 if (s->bank[i].size) {
 s->bank[i].bcr = sdram_ddr2_bcr(s->bank[i].base, s->bank[i].size);
-- 
2.30.4




[PATCH v7 0/8] ppc4xx_sdram QOMify and clean ups

2022-10-19 Thread BALATON Zoltan
This is the end of the QOMify series originially started by Cédric
rebased on master now only including patches not yet merged. Patches
that still need review are 1-3 (these only move code to
ppc4xx_sdram.c) and 6-7 (unify DDR and DDR2 models to share code where
possible).

Regards,
BALATON Zoltan

v7: Rebase on master after merge of first part of the series
v6: Split patch moving sdram controller models together into smaller steps
v5: Add functions the enable sdram controller and call it from boards
v4: address more review comments
v3: Fix patches that got squashed during rebase
v2: address some review comments and try to avoid compile problem with
gcc 12.2 (untested)


BALATON Zoltan (8):
  ppc440_uc.c: Move DDR2 SDRAM controller model to ppc4xx_sdram.c
  ppc4xx_devs.c: Move DDR SDRAM controller model to ppc4xx_sdram.c
  ppc4xx_sdram: Move ppc4xx_sdram_banks() to ppc4xx_sdram.c
  ppc4xx_sdram: Use hwaddr for memory bank size
  ppc4xx_sdram: Rename local state variable for brevity
  ppc4xx_sdram: Generalise bank setup
  ppc4xx_sdram: Convert DDR SDRAM controller to new bank handling
  ppc4xx_sdram: Add errp parameter to ppc4xx_sdram_banks()

 hw/ppc/meson.build  |   3 +-
 hw/ppc/ppc440_uc.c  | 332 --
 hw/ppc/ppc4xx_devs.c| 414 --
 hw/ppc/ppc4xx_sdram.c   | 757 
 hw/ppc/trace-events |   1 +
 include/hw/ppc/ppc4xx.h |  20 +-
 6 files changed, 768 insertions(+), 759 deletions(-)
 create mode 100644 hw/ppc/ppc4xx_sdram.c

-- 
2.30.4




[PATCH v7 6/8] ppc4xx_sdram: Generalise bank setup

2022-10-19 Thread BALATON Zoltan
Currently only base and size are set on initial bank creation and bcr
value is computed on mapping the region. Set bcr at init so the bcr
encoding method becomes local to the controller model and mapping and
unmapping can operate on the bank so it can be shared between
different controller models. This patch converts the DDR2 controller.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc4xx_sdram.c | 91 ++-
 hw/ppc/trace-events   |  1 +
 2 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/hw/ppc/ppc4xx_sdram.c b/hw/ppc/ppc4xx_sdram.c
index 4bc53c8f01..63a33b8fd4 100644
--- a/hw/ppc/ppc4xx_sdram.c
+++ b/hw/ppc/ppc4xx_sdram.c
@@ -105,6 +105,7 @@ static void ppc4xx_sdram_banks(MemoryRegion *ram, int 
nr_banks,
 
 static void sdram_bank_map(Ppc4xxSdramBank *bank)
 {
+trace_ppc4xx_sdram_map(bank->base, bank->size);
 memory_region_init(>container, NULL, "sdram-container", bank->size);
 memory_region_add_subregion(>container, 0, >ram);
 memory_region_add_subregion(get_system_memory(), bank->base,
@@ -113,11 +114,26 @@ static void sdram_bank_map(Ppc4xxSdramBank *bank)
 
 static void sdram_bank_unmap(Ppc4xxSdramBank *bank)
 {
+trace_ppc4xx_sdram_unmap(bank->base, bank->size);
 memory_region_del_subregion(get_system_memory(), >container);
 memory_region_del_subregion(>container, >ram);
 object_unparent(OBJECT(>container));
 }
 
+static void sdram_bank_set_bcr(Ppc4xxSdramBank *bank, uint32_t bcr,
+   hwaddr base, hwaddr size, int enabled)
+{
+if (memory_region_is_mapped(>container)) {
+sdram_bank_unmap(bank);
+}
+bank->bcr = bcr;
+bank->base = base;
+bank->size = size;
+if (enabled && (bcr & 1)) {
+sdram_bank_map(bank);
+}
+}
+
 enum {
 SDRAM0_CFGADDR = 0x010,
 SDRAM0_CFGDATA = 0x011,
@@ -455,6 +471,8 @@ void ppc4xx_sdram_ddr_enable(Ppc4xxSdramDdrState *s)
 
 /*/
 /* DDR2 SDRAM controller */
+#define SDRAM_DDR2_BCR_MASK 0xffe0ffc1
+
 enum {
 SDRAM_R0BAS = 0x40,
 SDRAM_R1BAS,
@@ -528,48 +546,6 @@ static hwaddr sdram_ddr2_size(uint32_t bcr)
 return size;
 }
 
-static void sdram_ddr2_set_bcr(Ppc4xxSdramDdr2State *sdram, int i,
-   uint32_t bcr, int enabled)
-{
-if (sdram->bank[i].bcr & 1) {
-/* First unmap RAM if enabled */
-trace_ppc4xx_sdram_unmap(sdram_ddr2_base(sdram->bank[i].bcr),
- sdram_ddr2_size(sdram->bank[i].bcr));
-sdram_bank_unmap(>bank[i]);
-}
-sdram->bank[i].bcr = bcr & 0xffe0ffc1;
-if (enabled && (bcr & 1)) {
-trace_ppc4xx_sdram_map(sdram_ddr2_base(bcr), sdram_ddr2_size(bcr));
-sdram_bank_map(>bank[i]);
-}
-}
-
-static void sdram_ddr2_map_bcr(Ppc4xxSdramDdr2State *sdram)
-{
-int i;
-
-for (i = 0; i < sdram->nbanks; i++) {
-if (sdram->bank[i].size) {
-sdram_ddr2_set_bcr(sdram, i,
-   sdram_ddr2_bcr(sdram->bank[i].base,
-  sdram->bank[i].size), 1);
-} else {
-sdram_ddr2_set_bcr(sdram, i, 0, 0);
-}
-}
-}
-
-static void sdram_ddr2_unmap_bcr(Ppc4xxSdramDdr2State *sdram)
-{
-int i;
-
-for (i = 0; i < sdram->nbanks; i++) {
-if (sdram->bank[i].size) {
-sdram_ddr2_set_bcr(sdram, i, sdram->bank[i].bcr & ~1, 0);
-}
-}
-}
-
 static uint32_t sdram_ddr2_dcr_read(void *opaque, int dcrn)
 {
 Ppc4xxSdramDdr2State *s = opaque;
@@ -628,6 +604,7 @@ static uint32_t sdram_ddr2_dcr_read(void *opaque, int dcrn)
 static void sdram_ddr2_dcr_write(void *opaque, int dcrn, uint32_t val)
 {
 Ppc4xxSdramDdr2State *s = opaque;
+int i;
 
 switch (dcrn) {
 case SDRAM_R0BAS:
@@ -652,13 +629,25 @@ static void sdram_ddr2_dcr_write(void *opaque, int dcrn, 
uint32_t val)
 (val & SDRAM_DDR2_MCOPT2_DCEN)) {
 trace_ppc4xx_sdram_enable("enable");
 /* validate all RAM mappings */
-sdram_ddr2_map_bcr(s);
+for (i = 0; i < s->nbanks; i++) {
+if (s->bank[i].size) {
+sdram_bank_set_bcr(>bank[i], s->bank[i].bcr,
+   s->bank[i].base, s->bank[i].size,
+   1);
+}
+}
 s->mcopt2 |= SDRAM_DDR2_MCOPT2_DCEN;
 } else if ((s->mcopt2 & SDRAM_DDR2_MCOPT2_DCEN) &&
!(val & SDRAM_DDR2_MCOPT2_DCEN)) {
 trace_ppc4xx_sdram_enable("disable");
 /* invalidate all RAM mappings */
-sdram_ddr2_unmap_bcr(s);
+for (i = 0; i < s->nbanks; i++) {
+if (s->bank[i].size) {
+sdram_bank_set_bcr(>bank[i], s->bank[i].bcr,
+   

[PATCH v7 3/8] ppc4xx_sdram: Move ppc4xx_sdram_banks() to ppc4xx_sdram.c

2022-10-19 Thread BALATON Zoltan
This function is only used by the ppc4xx memory controller models so
it can be made static.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc4xx_devs.c| 62 -
 hw/ppc/ppc4xx_sdram.c   | 61 
 include/hw/ppc/ppc4xx.h | 20 ++---
 3 files changed, 69 insertions(+), 74 deletions(-)

diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c
index f737dbb3d6..c1d111465d 100644
--- a/hw/ppc/ppc4xx_devs.c
+++ b/hw/ppc/ppc4xx_devs.c
@@ -23,73 +23,11 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/units.h"
 #include "cpu.h"
 #include "hw/ppc/ppc4xx.h"
 #include "hw/qdev-properties.h"
 #include "qapi/error.h"
 
-/*
- * Split RAM between SDRAM banks.
- *
- * sdram_bank_sizes[] must be in descending order, that is sizes[i] > 
sizes[i+1]
- * and must be 0-terminated.
- *
- * The 4xx SDRAM controller supports a small number of banks, and each bank
- * must be one of a small set of sizes. The number of banks and the supported
- * sizes varies by SoC.
- */
-void ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks,
-Ppc4xxSdramBank ram_banks[],
-const ram_addr_t sdram_bank_sizes[])
-{
-ram_addr_t size_left = memory_region_size(ram);
-ram_addr_t base = 0;
-ram_addr_t bank_size;
-int i;
-int j;
-
-for (i = 0; i < nr_banks; i++) {
-for (j = 0; sdram_bank_sizes[j] != 0; j++) {
-bank_size = sdram_bank_sizes[j];
-if (bank_size <= size_left) {
-char name[32];
-
-ram_banks[i].base = base;
-ram_banks[i].size = bank_size;
-base += bank_size;
-size_left -= bank_size;
-snprintf(name, sizeof(name), "ppc4xx.sdram%d", i);
-memory_region_init_alias(_banks[i].ram, NULL, name, ram,
- ram_banks[i].base, ram_banks[i].size);
-break;
-}
-}
-if (!size_left) {
-/* No need to use the remaining banks. */
-break;
-}
-}
-
-if (size_left) {
-ram_addr_t used_size = memory_region_size(ram) - size_left;
-GString *s = g_string_new(NULL);
-
-for (i = 0; sdram_bank_sizes[i]; i++) {
-g_string_append_printf(s, "%" PRIi64 "%s",
-   sdram_bank_sizes[i] / MiB,
-   sdram_bank_sizes[i + 1] ? ", " : "");
-}
-error_report("at most %d bank%s of %s MiB each supported",
- nr_banks, nr_banks == 1 ? "" : "s", s->str);
-error_printf("Possible valid RAM size: %" PRIi64 " MiB\n",
-used_size ? used_size / MiB : sdram_bank_sizes[i - 1] / MiB);
-
-g_string_free(s, true);
-exit(EXIT_FAILURE);
-}
-}
-
 /*/
 /* MAL */
 
diff --git a/hw/ppc/ppc4xx_sdram.c b/hw/ppc/ppc4xx_sdram.c
index d88363bc3d..62ef7d8f0d 100644
--- a/hw/ppc/ppc4xx_sdram.c
+++ b/hw/ppc/ppc4xx_sdram.c
@@ -43,6 +43,67 @@
 /*/
 /* Shared functions */
 
+/*
+ * Split RAM between SDRAM banks.
+ *
+ * sdram_bank_sizes[] must be in descending order, that is sizes[i] > 
sizes[i+1]
+ * and must be 0-terminated.
+ *
+ * The 4xx SDRAM controller supports a small number of banks, and each bank
+ * must be one of a small set of sizes. The number of banks and the supported
+ * sizes varies by SoC.
+ */
+static void ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks,
+   Ppc4xxSdramBank ram_banks[],
+   const ram_addr_t sdram_bank_sizes[])
+{
+ram_addr_t size_left = memory_region_size(ram);
+ram_addr_t base = 0;
+ram_addr_t bank_size;
+int i;
+int j;
+
+for (i = 0; i < nr_banks; i++) {
+for (j = 0; sdram_bank_sizes[j] != 0; j++) {
+bank_size = sdram_bank_sizes[j];
+if (bank_size <= size_left) {
+char name[32];
+
+ram_banks[i].base = base;
+ram_banks[i].size = bank_size;
+base += bank_size;
+size_left -= bank_size;
+snprintf(name, sizeof(name), "ppc4xx.sdram%d", i);
+memory_region_init_alias(_banks[i].ram, NULL, name, ram,
+ ram_banks[i].base, ram_banks[i].size);
+break;
+}
+}
+if (!size_left) {
+/* No need to use the remaining banks. */
+break;
+}
+}
+
+if (size_left) {
+ram_addr_t used_size = memory_region_size(ram) - size_left;
+GString *s = g_string_new(NULL);
+
+for (i = 0; sdram_bank_sizes[i]; i++) {
+g_string_append_printf(s, "%" PRIi64 "%s",
+   sdram_bank_sizes[i] / 

[PATCH v7 5/8] ppc4xx_sdram: Rename local state variable for brevity

2022-10-19 Thread BALATON Zoltan
Rename the sdram local state variable to s in dcr read/write functions
and reset methods for better readability and to match realize methods.
Other places not converted will be changed or removed in subsequent
patches.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Philippe Mathieu-Daudé 
---
 hw/ppc/ppc4xx_sdram.c | 158 +-
 1 file changed, 79 insertions(+), 79 deletions(-)

diff --git a/hw/ppc/ppc4xx_sdram.c b/hw/ppc/ppc4xx_sdram.c
index 2294747594..4bc53c8f01 100644
--- a/hw/ppc/ppc4xx_sdram.c
+++ b/hw/ppc/ppc4xx_sdram.c
@@ -237,56 +237,56 @@ static void sdram_ddr_unmap_bcr(Ppc4xxSdramDdrState 
*sdram)
 
 static uint32_t sdram_ddr_dcr_read(void *opaque, int dcrn)
 {
-Ppc4xxSdramDdrState *sdram = opaque;
+Ppc4xxSdramDdrState *s = opaque;
 uint32_t ret;
 
 switch (dcrn) {
 case SDRAM0_CFGADDR:
-ret = sdram->addr;
+ret = s->addr;
 break;
 case SDRAM0_CFGDATA:
-switch (sdram->addr) {
+switch (s->addr) {
 case 0x00: /* SDRAM_BESR0 */
-ret = sdram->besr0;
+ret = s->besr0;
 break;
 case 0x08: /* SDRAM_BESR1 */
-ret = sdram->besr1;
+ret = s->besr1;
 break;
 case 0x10: /* SDRAM_BEAR */
-ret = sdram->bear;
+ret = s->bear;
 break;
 case 0x20: /* SDRAM_CFG */
-ret = sdram->cfg;
+ret = s->cfg;
 break;
 case 0x24: /* SDRAM_STATUS */
-ret = sdram->status;
+ret = s->status;
 break;
 case 0x30: /* SDRAM_RTR */
-ret = sdram->rtr;
+ret = s->rtr;
 break;
 case 0x34: /* SDRAM_PMIT */
-ret = sdram->pmit;
+ret = s->pmit;
 break;
 case 0x40: /* SDRAM_B0CR */
-ret = sdram->bank[0].bcr;
+ret = s->bank[0].bcr;
 break;
 case 0x44: /* SDRAM_B1CR */
-ret = sdram->bank[1].bcr;
+ret = s->bank[1].bcr;
 break;
 case 0x48: /* SDRAM_B2CR */
-ret = sdram->bank[2].bcr;
+ret = s->bank[2].bcr;
 break;
 case 0x4C: /* SDRAM_B3CR */
-ret = sdram->bank[3].bcr;
+ret = s->bank[3].bcr;
 break;
 case 0x80: /* SDRAM_TR */
 ret = -1; /* ? */
 break;
 case 0x94: /* SDRAM_ECCCFG */
-ret = sdram->ecccfg;
+ret = s->ecccfg;
 break;
 case 0x98: /* SDRAM_ECCESR */
-ret = sdram->eccesr;
+ret = s->eccesr;
 break;
 default: /* Error */
 ret = -1;
@@ -304,78 +304,78 @@ static uint32_t sdram_ddr_dcr_read(void *opaque, int dcrn)
 
 static void sdram_ddr_dcr_write(void *opaque, int dcrn, uint32_t val)
 {
-Ppc4xxSdramDdrState *sdram = opaque;
+Ppc4xxSdramDdrState *s = opaque;
 
 switch (dcrn) {
 case SDRAM0_CFGADDR:
-sdram->addr = val;
+s->addr = val;
 break;
 case SDRAM0_CFGDATA:
-switch (sdram->addr) {
+switch (s->addr) {
 case 0x00: /* SDRAM_BESR0 */
-sdram->besr0 &= ~val;
+s->besr0 &= ~val;
 break;
 case 0x08: /* SDRAM_BESR1 */
-sdram->besr1 &= ~val;
+s->besr1 &= ~val;
 break;
 case 0x10: /* SDRAM_BEAR */
-sdram->bear = val;
+s->bear = val;
 break;
 case 0x20: /* SDRAM_CFG */
 val &= 0xFFE0;
-if (!(sdram->cfg & 0x8000) && (val & 0x8000)) {
+if (!(s->cfg & 0x8000) && (val & 0x8000)) {
 trace_ppc4xx_sdram_enable("enable");
 /* validate all RAM mappings */
-sdram_ddr_map_bcr(sdram);
-sdram->status &= ~0x8000;
-} else if ((sdram->cfg & 0x8000) && !(val & 0x8000)) {
+sdram_ddr_map_bcr(s);
+s->status &= ~0x8000;
+} else if ((s->cfg & 0x8000) && !(val & 0x8000)) {
 trace_ppc4xx_sdram_enable("disable");
 /* invalidate all RAM mappings */
-sdram_ddr_unmap_bcr(sdram);
-sdram->status |= 0x8000;
+sdram_ddr_unmap_bcr(s);
+s->status |= 0x8000;
 }
-if (!(sdram->cfg & 0x4000) && (val & 0x4000)) {
-sdram->status |= 0x4000;
-} else if ((sdram->cfg & 0x4000) && !(val & 0x4000)) {
-sdram->status &= ~0x4000;
+if (!(s->cfg & 0x4000) && (val & 0x4000)) {
+s->status |= 0x4000;
+} else if ((s->cfg & 0x4000) && !(val & 0x4000)) {
+s->status &= ~0x4000;
 }
-sdram->cfg = val;
+

[PATCH v7 1/8] ppc440_uc.c: Move DDR2 SDRAM controller model to ppc4xx_sdram.c

2022-10-19 Thread BALATON Zoltan
In order to move PPC4xx SDRAM controller models together move out the
DDR2 controller model from ppc440_uc.c into a new ppc4xx_sdram.c file.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/meson.build|   3 +-
 hw/ppc/ppc440_uc.c| 332 
 hw/ppc/ppc4xx_sdram.c | 348 ++
 3 files changed, 350 insertions(+), 333 deletions(-)
 create mode 100644 hw/ppc/ppc4xx_sdram.c

diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index 32babc9b48..c927337da0 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build
@@ -59,8 +59,9 @@ ppc_ss.add(when: 'CONFIG_PPC440', if_true: files(
   'ppc440_bamboo.c',
   'ppc440_pcix.c', 'ppc440_uc.c'))
 ppc_ss.add(when: 'CONFIG_PPC4XX', if_true: files(
+  'ppc4xx_devs.c',
   'ppc4xx_pci.c',
-  'ppc4xx_devs.c'))
+  'ppc4xx_sdram.c'))
 ppc_ss.add(when: 'CONFIG_SAM460EX', if_true: files('sam460ex.c'))
 # PReP
 ppc_ss.add(when: 'CONFIG_PREP', if_true: files('prep.c'))
diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
index 5fbf44009e..651263926e 100644
--- a/hw/ppc/ppc440_uc.c
+++ b/hw/ppc/ppc440_uc.c
@@ -10,21 +10,14 @@
 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
-#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
-#include "qemu/module.h"
 #include "hw/irq.h"
-#include "exec/memory.h"
-#include "cpu.h"
 #include "hw/ppc/ppc4xx.h"
 #include "hw/qdev-properties.h"
 #include "hw/pci/pci.h"
-#include "sysemu/block-backend.h"
 #include "sysemu/reset.h"
 #include "ppc440.h"
-#include "qom/object.h"
-#include "trace.h"
 
 /*/
 /* L2 Cache as SRAM */
@@ -478,331 +471,6 @@ void ppc4xx_sdr_init(CPUPPCState *env)
  sdr, _read_sdr, _write_sdr);
 }
 
-/*/
-/* SDRAM controller */
-enum {
-SDRAM0_CFGADDR = 0x10,
-SDRAM0_CFGDATA,
-SDRAM_R0BAS = 0x40,
-SDRAM_R1BAS,
-SDRAM_R2BAS,
-SDRAM_R3BAS,
-SDRAM_CONF1HB = 0x45,
-SDRAM_PLBADDULL = 0x4a,
-SDRAM_CONF1LL = 0x4b,
-SDRAM_CONFPATHB = 0x4f,
-SDRAM_PLBADDUHB = 0x50,
-};
-
-static uint32_t sdram_ddr2_bcr(hwaddr ram_base, hwaddr ram_size)
-{
-uint32_t bcr;
-
-switch (ram_size) {
-case 8 * MiB:
-bcr = 0xffc0;
-break;
-case 16 * MiB:
-bcr = 0xff80;
-break;
-case 32 * MiB:
-bcr = 0xff00;
-break;
-case 64 * MiB:
-bcr = 0xfe00;
-break;
-case 128 * MiB:
-bcr = 0xfc00;
-break;
-case 256 * MiB:
-bcr = 0xf800;
-break;
-case 512 * MiB:
-bcr = 0xf000;
-break;
-case 1 * GiB:
-bcr = 0xe000;
-break;
-case 2 * GiB:
-bcr = 0xc000;
-break;
-case 4 * GiB:
-bcr = 0x8000;
-break;
-default:
-error_report("invalid RAM size " TARGET_FMT_plx, ram_size);
-return 0;
-}
-bcr |= ram_base >> 2 & 0xffe0;
-bcr |= 1;
-
-return bcr;
-}
-
-static inline hwaddr sdram_ddr2_base(uint32_t bcr)
-{
-return (bcr & 0xffe0) << 2;
-}
-
-static uint64_t sdram_ddr2_size(uint32_t bcr)
-{
-uint64_t size;
-int sh;
-
-sh = 1024 - ((bcr >> 6) & 0x3ff);
-size = 8 * MiB * sh;
-
-return size;
-}
-
-static void sdram_bank_map(Ppc4xxSdramBank *bank)
-{
-memory_region_init(>container, NULL, "sdram-container", bank->size);
-memory_region_add_subregion(>container, 0, >ram);
-memory_region_add_subregion(get_system_memory(), bank->base,
->container);
-}
-
-static void sdram_bank_unmap(Ppc4xxSdramBank *bank)
-{
-memory_region_del_subregion(get_system_memory(), >container);
-memory_region_del_subregion(>container, >ram);
-object_unparent(OBJECT(>container));
-}
-
-static void sdram_ddr2_set_bcr(Ppc4xxSdramDdr2State *sdram, int i,
-   uint32_t bcr, int enabled)
-{
-if (sdram->bank[i].bcr & 1) {
-/* First unmap RAM if enabled */
-trace_ppc4xx_sdram_unmap(sdram_ddr2_base(sdram->bank[i].bcr),
- sdram_ddr2_size(sdram->bank[i].bcr));
-sdram_bank_unmap(>bank[i]);
-}
-sdram->bank[i].bcr = bcr & 0xffe0ffc1;
-if (enabled && (bcr & 1)) {
-trace_ppc4xx_sdram_map(sdram_ddr2_base(bcr), sdram_ddr2_size(bcr));
-sdram_bank_map(>bank[i]);
-}
-}
-
-static void sdram_ddr2_map_bcr(Ppc4xxSdramDdr2State *sdram)
-{
-int i;
-
-for (i = 0; i < sdram->nbanks; i++) {
-if (sdram->bank[i].size) {
-sdram_ddr2_set_bcr(sdram, i,
-   sdram_ddr2_bcr(sdram->bank[i].base,
-  sdram->bank[i].size), 1);
-} else {
-sdram_ddr2_set_bcr(sdram, i, 0, 0);
-}
-}
-}
-
-static void sdram_ddr2_unmap_bcr(Ppc4xxSdramDdr2State *sdram)
-{
-int i;
-
-for (i = 0; i 

[PATCH v7 7/8] ppc4xx_sdram: Convert DDR SDRAM controller to new bank handling

2022-10-19 Thread BALATON Zoltan
Use the generic bank handling introduced in previous patch in the DDR
SDRAM controller too. This also fixes previously broken region unmap
due to sdram_ddr_unmap_bcr() ignoring container region so it crashed
with an assert when the guest tried to disable the controller.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc4xx_sdram.c | 98 ---
 1 file changed, 37 insertions(+), 61 deletions(-)

diff --git a/hw/ppc/ppc4xx_sdram.c b/hw/ppc/ppc4xx_sdram.c
index 63a33b8fd4..7c097efe20 100644
--- a/hw/ppc/ppc4xx_sdram.c
+++ b/hw/ppc/ppc4xx_sdram.c
@@ -141,6 +141,8 @@ enum {
 
 /*/
 /* DDR SDRAM controller */
+#define SDRAM_DDR_BCR_MASK 0xFFDEE001
+
 static uint32_t sdram_ddr_bcr(hwaddr ram_base, hwaddr ram_size)
 {
 uint32_t bcr;
@@ -199,58 +201,6 @@ static hwaddr sdram_ddr_size(uint32_t bcr)
 return size;
 }
 
-static void sdram_ddr_set_bcr(Ppc4xxSdramDdrState *sdram, int i,
-  uint32_t bcr, int enabled)
-{
-if (sdram->bank[i].bcr & 1) {
-/* Unmap RAM */
-trace_ppc4xx_sdram_unmap(sdram_ddr_base(sdram->bank[i].bcr),
- sdram_ddr_size(sdram->bank[i].bcr));
-memory_region_del_subregion(get_system_memory(),
->bank[i].container);
-memory_region_del_subregion(>bank[i].container,
->bank[i].ram);
-object_unparent(OBJECT(>bank[i].container));
-}
-sdram->bank[i].bcr = bcr & 0xFFDEE001;
-if (enabled && (bcr & 1)) {
-trace_ppc4xx_sdram_map(sdram_ddr_base(bcr), sdram_ddr_size(bcr));
-memory_region_init(>bank[i].container, NULL, "sdram-container",
-   sdram_ddr_size(bcr));
-memory_region_add_subregion(>bank[i].container, 0,
->bank[i].ram);
-memory_region_add_subregion(get_system_memory(),
-sdram_ddr_base(bcr),
->bank[i].container);
-}
-}
-
-static void sdram_ddr_map_bcr(Ppc4xxSdramDdrState *sdram)
-{
-int i;
-
-for (i = 0; i < sdram->nbanks; i++) {
-if (sdram->bank[i].size != 0) {
-sdram_ddr_set_bcr(sdram, i, sdram_ddr_bcr(sdram->bank[i].base,
-  sdram->bank[i].size), 1);
-} else {
-sdram_ddr_set_bcr(sdram, i, 0, 0);
-}
-}
-}
-
-static void sdram_ddr_unmap_bcr(Ppc4xxSdramDdrState *sdram)
-{
-int i;
-
-for (i = 0; i < sdram->nbanks; i++) {
-trace_ppc4xx_sdram_unmap(sdram_ddr_base(sdram->bank[i].bcr),
- sdram_ddr_size(sdram->bank[i].bcr));
-memory_region_del_subregion(get_system_memory(),
->bank[i].ram);
-}
-}
-
 static uint32_t sdram_ddr_dcr_read(void *opaque, int dcrn)
 {
 Ppc4xxSdramDdrState *s = opaque;
@@ -321,6 +271,7 @@ static uint32_t sdram_ddr_dcr_read(void *opaque, int dcrn)
 static void sdram_ddr_dcr_write(void *opaque, int dcrn, uint32_t val)
 {
 Ppc4xxSdramDdrState *s = opaque;
+int i;
 
 switch (dcrn) {
 case SDRAM0_CFGADDR:
@@ -342,12 +293,24 @@ static void sdram_ddr_dcr_write(void *opaque, int dcrn, 
uint32_t val)
 if (!(s->cfg & 0x8000) && (val & 0x8000)) {
 trace_ppc4xx_sdram_enable("enable");
 /* validate all RAM mappings */
-sdram_ddr_map_bcr(s);
+for (i = 0; i < s->nbanks; i++) {
+if (s->bank[i].size) {
+sdram_bank_set_bcr(>bank[i], s->bank[i].bcr,
+   s->bank[i].base, s->bank[i].size,
+   1);
+}
+}
 s->status &= ~0x8000;
 } else if ((s->cfg & 0x8000) && !(val & 0x8000)) {
 trace_ppc4xx_sdram_enable("disable");
 /* invalidate all RAM mappings */
-sdram_ddr_unmap_bcr(s);
+for (i = 0; i < s->nbanks; i++) {
+if (s->bank[i].size) {
+sdram_bank_set_bcr(>bank[i], s->bank[i].bcr,
+   s->bank[i].base, s->bank[i].size,
+   0);
+}
+}
 s->status |= 0x8000;
 }
 if (!(s->cfg & 0x4000) && (val & 0x4000)) {
@@ -367,16 +330,16 @@ static void sdram_ddr_dcr_write(void *opaque, int dcrn, 
uint32_t val)
 s->pmit = (val & 0xF800) | 0x07C0;
 break;
 case 0x40: /* SDRAM_B0CR */
-sdram_ddr_set_bcr(s, 0, val, s->cfg & 0x8000);
-break;
 case 0x44: /* SDRAM_B1CR */
-sdram_ddr_set_bcr(s, 1, val, s->cfg & 

Re: [PATCH v1 04/12] hw/i386/xen/xen-hvm: move x86-specific fields out of XenIOState

2022-10-19 Thread Paul Durrant

On 15/10/2022 06:07, Vikram Garhwal wrote:

From: Stefano Stabellini 

In preparation to moving most of xen-hvm code to an arch-neutral location, move:
- shared_vmport_page
- log_for_dirtybit
- dirty_bitmap
- suspend
- wakeup

out of XenIOState struct as these are only used on x86, especially the ones
related to dirty logging.
Updated XenIOState can be used for both aarch64 and x86.

Also, remove free_phys_offset as it was unused.

Signed-off-by: Stefano Stabellini 
Signed-off-by: Vikram Garhwal 


Reviewed-by: Paul Durrant 




Re: [PATCH v2 1/2] util/log: Derive thread id from getpid() on hosts w/o gettid() syscall

2022-10-19 Thread Daniel P . Berrangé
On Wed, Oct 19, 2022 at 05:16:50PM +0200, Greg Kurz wrote:
> A subsequent patch needs to be able to differentiate the main QEMU
> thread from other threads. An obvious way to do so is to compare
> log_thread_id() and getpid(), based on the fact that they are equal
> for the main thread on systems that have the gettid() syscall (e.g.
> linux).
> 
> Adapt the fallback code for systems without gettid() to provide the
> same assumption.
> 
> Suggested-by: Paolo Bonzini 
> Signed-off-by: Greg Kurz 
> ---
>  util/log.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/util/log.c b/util/log.c
> index d6eb0378c3a3..e1c2535cfcd2 100644
> --- a/util/log.c
> +++ b/util/log.c
> @@ -72,8 +72,13 @@ static int log_thread_id(void)
>  #elif defined(SYS_gettid)
>  return syscall(SYS_gettid);
>  #else
> +static __thread int my_id = -1;
>  static int counter;
> -return qatomic_fetch_inc();
> +
> +if (my_id == -1) {
> +my_id = getpid() + qatomic_fetch_inc();
> +}
> +return my_id;

This doesn't look safe for linux-user when we fork, but don't exec.

The getpid() will change after the fork, but counter won't be
reset, so a thread in the parent could clash with a thread
in the forked child.

I feel like if we want to check for the main thread, we should
be using pthread_self(), and compare result against the value
cached from main. Or cache in a __constructor__ function in
log.c to keep it isolated from main().


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v1 04/12] hw/i386/xen/xen-hvm: move x86-specific fields out of XenIOState

2022-10-19 Thread Paul Durrant

On 15/10/2022 06:07, Vikram Garhwal wrote:

From: Stefano Stabellini 

In preparation to moving most of xen-hvm code to an arch-neutral location, move:
- shared_vmport_page
- log_for_dirtybit
- dirty_bitmap
- suspend
- wakeup

out of XenIOState struct as these are only used on x86, especially the ones
related to dirty logging.
Updated XenIOState can be used for both aarch64 and x86.

Also, remove free_phys_offset as it was unused.

Signed-off-by: Stefano Stabellini 
Signed-off-by: Vikram Garhwal 


Reviewed-by: Paul Durrant 





RE: [RFC 7/7] migration: call qemu_savevm_state_pending_exact() with the guest stopped

2022-10-19 Thread Yishai Hadas
> From: Qemu-devel  bounces+yishaih=nvidia@nongnu.org> On Behalf Of Jason Gunthorpe
> Sent: Tuesday, 18 October 2022 15:23
> To: Joao Martins 
> Cc: quint...@redhat.com; Alex Williamson ;
> Eric Blake ; Stefan Hajnoczi ;
> Fam Zheng ; qemu-s3...@nongnu.org; Cornelia Huck
> ; Thomas Huth ; Vladimir
> Sementsov-Ogievskiy ; Laurent Vivier
> ; John Snow ; Dr. David Alan
> Gilbert ; Christian Borntraeger
> ; Halil Pasic ; Paolo
> Bonzini ; qemu-bl...@nongnu.org; Eric Farman
> ; Richard Henderson
> ; David Hildenbrand ;
> Avihai Horon ; qemu-devel@nongnu.org
> Subject: Re: [RFC 7/7] migration: call qemu_savevm_state_pending_exact()
> with the guest stopped
> 
> On Fri, Oct 14, 2022 at 01:29:51PM +0100, Joao Martins wrote:
> > On 14/10/2022 12:28, Juan Quintela wrote:
> > > Joao Martins  wrote:
> > >> On 13/10/2022 17:08, Juan Quintela wrote:
> > >>> Oops.  My understanding was that once the guest is stopped you can
> > >>> say how big is it.
> > >
> > > Hi
> > >
> > >> It's worth keeping in mind that conceptually a VF won't stop (e.g.
> > >> DMA) until really told through VFIO. So, stopping CPUs (guest) just
> > >> means that guest code does not arm the VF for more I/O but still is
> > >> a weak guarantee as VF still has outstanding I/O to deal with until
> > >> VFIO tells it to quiesce DMA (for devices that support it).
> > >
> > > How can we make sure about that?
> > >
> > > i.e. I know I have a vfio device.  I need two things:
> > > - in the iterative stage, I eed to check the size, but a estimate is ok.
> > >   for example with RAM, we use whatever is the size of the dirty bitmap
> > >   at that moment.
> > >   If the estimated size is smaller than the theshold, we
> > >* stop the guest
> > >* sync dirty bitmap
> > >* now we test the (real) dirty bitmap size
> > >
> > > How can we do something like that with a vfio device.
> > >
> > You would have an extra intermediate step that stops the VF prior to
> > asking the device state length. What I am not sure is whether stopping
> > vCPUs can be skipped as an optimization.
> 
> It cannot, if you want to stop the VFIO device you must also stop the vCPUs
> because the device is not required to respond properly to MMIO operations
> when stopped.
> 
> > > My understanding from NVidia folks was that newer firmware have an
> > > ioctl to return than information.
> >
> > Maybe there's something new. I was thinking from this here:
> 
> Juan is talking about the ioctl we had in the pre-copy series.
> 
> I expect it to come into some different form to support this RFC.
> 

Do we really need to STOP the VM to get the exact data length that will be 
required to complete stop copy ?

Can't we simply go with some close estimation when the device is running and 
drop all the complexity in QEMU/Kernel to STOP and then RE-START the VM if the 
threshold didn't meet, etc.?

Yishai


Re: [PATCH v10 6/9] s390x/cpu topology: add topology-disable machine property

2022-10-19 Thread Pierre Morel




On 10/19/22 11:03, Cornelia Huck wrote:

On Tue, Oct 18 2022, Cédric Le Goater  wrote:


On 10/12/22 18:21, Pierre Morel wrote:

S390 CPU topology is only allowed for s390-virtio-ccw-7.3 and
newer S390 machines.
We keep the possibility to disable the topology on these newer
machines with the property topology-disable.

Signed-off-by: Pierre Morel 
---
   include/hw/boards.h|  3 ++
   include/hw/s390x/cpu-topology.h| 18 +-
   include/hw/s390x/s390-virtio-ccw.h |  2 ++
   hw/core/machine.c  |  5 +++
   hw/s390x/s390-virtio-ccw.c | 53 +-
   util/qemu-config.c |  4 +++
   qemu-options.hx|  6 +++-
   7 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/include/hw/boards.h b/include/hw/boards.h
index 311ed17e18..67147c47bf 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -379,6 +379,9 @@ struct MachineState {
   } \
   type_init(machine_initfn##_register_types)
   
+extern GlobalProperty hw_compat_7_2[];

+extern const size_t hw_compat_7_2_len;


QEMU 7.2 is not out yet.


Yes, and the introduction of the new compat machines needs to go into a
separate patch. I'm usually preparing that patch while QEMU is in
freeze, but feel free to cook up a patch earlier if you need it.


OK, Thanks, I understand I put it in a separate file so it can be 
adapted at the moment the series will need to be merged.


Regards,
Pierre

--
Pierre Morel
IBM Lab Boeblingen



Re: [PATCH v5 03/10] acpi/tests/avocado/bits: disable acpi PSS tests that are failing in biosbits

2022-10-19 Thread Ani Sinha
On Wed, Oct 19, 2022 at 9:01 PM Alex Bennée  wrote:
>
>
> Ani Sinha  writes:
>
> > PSS tests in acpi test suite seems to be failing in biosbits. This is 
> > because
> > the test is unable to find PSS support in QEMU bios. Let us disable
> > them for now so that make check does not fail. We can fix the tests and
> > re-enable them later.
> >
> > Example failure:
> >
> >  ACPI _PSS (Pstate) table conformance tests 
> > [assert] _PSS must exist FAIL
> >   \_SB_.CPUS.C000
> >   No _PSS exists
> > Summary: 1 passed, 1 failed
> >  ACPI _PSS (Pstate) runtime tests 
> > [assert] _PSS must exist FAIL
> >   \_SB_.CPUS.C000
> >   No _PSS exists
> > Summary: 0 passed, 1 failed
> >
> > Cc: Daniel P. Berrangé 
> > Cc: Paolo Bonzini 
> > Cc: Maydell Peter 
> > Cc: John Snow 
> > Cc: Thomas Huth 
> > Cc: Alex Bennée 
> > Cc: Igor Mammedov 
> > Cc: Michael Tsirkin 
> > Signed-off-by: Ani Sinha 
> > ---
> >  tests/avocado/acpi-bits/bits-tests/testacpi.py2 | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/tests/avocado/acpi-bits/bits-tests/testacpi.py2 
> > b/tests/avocado/acpi-bits/bits-tests/testacpi.py2
> > index 18dc818d62..f818a9cce6 100644
> > --- a/tests/avocado/acpi-bits/bits-tests/testacpi.py2
> > +++ b/tests/avocado/acpi-bits/bits-tests/testacpi.py2
> > @@ -40,8 +40,8 @@ import time
> >
> >  def register_tests():
> >  testsuite.add_test("ACPI _MAT (Multiple APIC Table Entry) under 
> > Processor objects", test_mat, submenu="ACPI Tests")
> > -testsuite.add_test("ACPI _PSS (Pstate) table conformance tests", 
> > test_pss, submenu="ACPI Tests")
> > -testsuite.add_test("ACPI _PSS (Pstate) runtime tests", test_pstates, 
> > submenu="ACPI Tests")
> > +#testsuite.add_test("ACPI _PSS (Pstate) table conformance tests", 
> > test_pss, submenu="ACPI Tests")
> > +#testsuite.add_test("ACPI _PSS (Pstate) runtime tests", test_pstates, 
> > submenu="ACPI Tests")
> >  testsuite.add_test("ACPI DSDT (Differentiated System Description 
> > Table)", test_dsdt, submenu="ACPI Tests")
> >  testsuite.add_test("ACPI FACP (Fixed ACPI Description Table)", 
> > test_facp, submenu="ACPI Tests")
> >  testsuite.add_test("ACPI HPET (High Precision Event Timer Table)", 
> > test_hpet, submenu="ACPI Tests")
>
> I think this breaks bisection so should probably be included in the
> commit that add the test with a comment in the commit message.

Ah I see. The addition and the disabling has to be atomic so that
there are no test failures if addition patch is cherry-picked and not
the patch that disabled the tests.
Hmm.



Re: [PATCH v2 03/11] migration: Make migration json writer part of MigrationState struct

2022-10-19 Thread Nikolay Borisov




On 18.10.22 г. 13:06 ч., Daniel P. Berrangé wrote:

On Mon, Oct 10, 2022 at 04:34:00PM +0300, Nikolay Borisov wrote:

This is required so that migration stream configuration is written
to the migration stream. This would allow analyze-migration to
parse enabled capabilities for the migration and adjust its behavior
accordingly. This is in preparation for analyze-migration.py to support
'fixed-ram' capability format changes.

Signed-off-by: Nikolay Borisov 
---
  migration/migration.c |  5 +
  migration/migration.h |  3 +++
  migration/savevm.c| 38 ++
  3 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 140b0f1a54bd..d0779bbaf862 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1896,6 +1896,8 @@ static void migrate_fd_cleanup(MigrationState *s)
  g_free(s->hostname);
  s->hostname = NULL;
  
+json_writer_free(s->vmdesc);

+
  qemu_savevm_state_cleanup();
  
  if (s->to_dst_file) {

@@ -2154,6 +2156,7 @@ void migrate_init(MigrationState *s)
  error_free(s->error);
  s->error = NULL;
  s->hostname = NULL;
+s->vmdesc = NULL;
  
  migrate_set_state(>state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
  
@@ -4269,6 +4272,8 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)

  return;
  }
  
+s->vmdesc = json_writer_new(false);

+
  if (multifd_save_setup(_err) != 0) {
  error_report_err(local_err);
  migrate_set_state(>state, MIGRATION_STATUS_SETUP,
diff --git a/migration/migration.h b/migration/migration.h
index cdad8aceaaab..96f27aba2210 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -17,6 +17,7 @@
  #include "exec/cpu-common.h"
  #include "hw/qdev-core.h"
  #include "qapi/qapi-types-migration.h"
+#include "qapi/qmp/json-writer.h"
  #include "qemu/thread.h"
  #include "qemu/coroutine_int.h"
  #include "io/channel.h"
@@ -261,6 +262,8 @@ struct MigrationState {
  
  int state;
  
+JSONWriter *vmdesc;

+
  /* State related to return path */
  struct {
  /* Protected by qemu_file_lock */
diff --git a/migration/savevm.c b/migration/savevm.c
index 48e85c052c2c..174cdbefc29d 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1137,13 +1137,18 @@ void qemu_savevm_non_migratable_list(strList **reasons)
  
  void qemu_savevm_state_header(QEMUFile *f)

  {
+MigrationState *s = migrate_get_current();
  trace_savevm_state_header();
  qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
  qemu_put_be32(f, QEMU_VM_FILE_VERSION);
  
-if (migrate_get_current()->send_configuration) {

+if (s->send_configuration) {
  qemu_put_byte(f, QEMU_VM_CONFIGURATION);
-vmstate_save_state(f, _configuration, _state, 0);
+   json_writer_start_object(s->vmdesc, NULL);
+   json_writer_start_object(s->vmdesc, "configuration");
+vmstate_save_state(f, _configuration, _state, 
s->vmdesc);
+   json_writer_end_object(s->vmdesc);
+


IIUC, this is changing the info that is written in the VM
configuration section, by adding an extra level of nesting
to the object.

Isn't this going to cause backwards compatibility problems ?

Nothing in the patch seems to take account of the exctra
'configuiration' object that has been started


The resulting json looks like:

{
"configuration": {
"vmsd_name": "configuration",
"version": 1,
"fields": [
{
"name": "len",
"type": "uint32",
"size": 4
},
{
"name": "name",
"type": "buffer",
"size": 13
}
],
"subsections": [
{
"vmsd_name": "configuration/capabilities",
"version": 1,
"fields": [
{
"name": "caps_count",
"type": "uint32",
"size": 4
},
{
"name": "capabilities",
"type": "capability",
"size": 10
}
]
}
]
},
"page_size": 4096,
"devices": [
{
"name": "timer",
"instance_id": 0,
//ommitted

So the "configuration" object is indeed added, but older versions of 
qemu can ignore it without any problem.





Also, there's two  json_writer_start_object calls, but only
one json_writer_end_object.


That's intentional, the first one begins the top-level object and it is 
actually paired with the final call to 
json_writer_end_object(s->vmdesc); in 
qemu_savevm_state_complete_precopy_non_iterable .




BTW, some  crept into this patch.


Will fix this.

PS. I usually work on the linux kernel so vim used my linuxsty.vim 
settings. However, I eventually instsalled 

Re: [PATCH v3 1/2] Refactoring: refactor TFR() macro to RETRY_ON_EINTR()

2022-10-19 Thread Christian Schoenebeck
On Dienstag, 18. Oktober 2022 10:43:40 CEST Nikita Ivanov wrote:
> Rename macro name to more transparent one and refactor
> it to expression.
> 
> Signed-off-by: Nikita Ivanov 
> ---
>  chardev/char-fd.c  | 2 +-
>  chardev/char-pipe.c| 8 +---
>  include/qemu/osdep.h   | 8 +++-
>  net/tap-bsd.c  | 6 +++---
>  net/tap-linux.c| 2 +-
>  net/tap-solaris.c  | 8 
>  net/tap.c  | 2 +-
>  os-posix.c | 2 +-
>  tests/qtest/libqtest.c | 2 +-
>  9 files changed, 24 insertions(+), 16 deletions(-)
> 
> diff --git a/chardev/char-fd.c b/chardev/char-fd.c
> index cf78454841..d2c4923359 100644
> --- a/chardev/char-fd.c
> +++ b/chardev/char-fd.c
> @@ -198,7 +198,7 @@ int qmp_chardev_open_file_source(char *src, int flags, 
> Error **errp)
>  {
>  int fd = -1;
>  
> -TFR(fd = qemu_open_old(src, flags, 0666));
> +fd = RETRY_ON_EINTR(qemu_open_old(src, flags, 0666));
>  if (fd == -1) {
>  error_setg_file_open(errp, errno, src);
>  }
> diff --git a/chardev/char-pipe.c b/chardev/char-pipe.c
> index 66d3b85091..5ad30bcc59 100644
> --- a/chardev/char-pipe.c
> +++ b/chardev/char-pipe.c
> @@ -131,8 +131,8 @@ static void qemu_chr_open_pipe(Chardev *chr,
>  
>  filename_in = g_strdup_printf("%s.in", filename);
>  filename_out = g_strdup_printf("%s.out", filename);
> -TFR(fd_in = qemu_open_old(filename_in, O_RDWR | O_BINARY));
> -TFR(fd_out = qemu_open_old(filename_out, O_RDWR | O_BINARY));
> +fd_in = RETRY_ON_EINTR(qemu_open_old(filename_in, O_RDWR | O_BINARY));
> +fd_out = RETRY_ON_EINTR(qemu_open_old(filename_out, O_RDWR | O_BINARY));
>  g_free(filename_in);
>  g_free(filename_out);
>  if (fd_in < 0 || fd_out < 0) {
> @@ -142,7 +142,9 @@ static void qemu_chr_open_pipe(Chardev *chr,
>  if (fd_out >= 0) {
>  close(fd_out);
>  }
> -TFR(fd_in = fd_out = qemu_open_old(filename, O_RDWR | O_BINARY));
> +fd_in = fd_out = RETRY_ON_EINTR(
> +qemu_open_old(filename, O_RDWR | O_BINARY)
> +);
>  if (fd_in < 0) {
>  error_setg_file_open(errp, errno, filename);
>  return;
> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> index b1c161c035..45fcf5f2dc 100644
> --- a/include/qemu/osdep.h
> +++ b/include/qemu/osdep.h
> @@ -243,7 +243,13 @@ void QEMU_ERROR("code path is reachable")
>  #define ESHUTDOWN 4099
>  #endif
>  
> -#define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
> +#define RETRY_ON_EINTR(expr) \
> +(__extension__  \
> +({ typeof(expr) __result;   \
> +   do { \
> +__result = (typeof(expr)) (expr); \

You forgot to drop the redundant type cast here. With that dropped:

Reviewed-by: Christian Schoenebeck 

> +   } while (__result == -1 && errno == EINTR); \
> +   __result; }))
>  
>  /* time_t may be either 32 or 64 bits depending on the host OS, and
>   * can be either signed or unsigned, so we can't just hardcode a
> diff --git a/net/tap-bsd.c b/net/tap-bsd.c
> index 005ce05c6e..4c98fdd337 100644
> --- a/net/tap-bsd.c
> +++ b/net/tap-bsd.c
> @@ -56,7 +56,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
>  } else {
>  snprintf(dname, sizeof dname, "/dev/tap%d", i);
>  }
> -TFR(fd = open(dname, O_RDWR));
> +fd = RETRY_ON_EINTR(open(dname, O_RDWR));
>  if (fd >= 0) {
>  break;
>  }
> @@ -111,7 +111,7 @@ static int tap_open_clone(char *ifname, int ifname_size, 
> Error **errp)
>  int fd, s, ret;
>  struct ifreq ifr;
>  
> -TFR(fd = open(PATH_NET_TAP, O_RDWR));
> +fd = RETRY_ON_EINTR(open(PATH_NET_TAP, O_RDWR));
>  if (fd < 0) {
>  error_setg_errno(errp, errno, "could not open %s", PATH_NET_TAP);
>  return -1;
> @@ -159,7 +159,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
>  if (ifname[0] != '\0') {
>  char dname[100];
>  snprintf(dname, sizeof dname, "/dev/%s", ifname);
> -TFR(fd = open(dname, O_RDWR));
> +fd = RETRY_ON_EINTR(open(dname, O_RDWR));
>  if (fd < 0 && errno != ENOENT) {
>  error_setg_errno(errp, errno, "could not open %s", dname);
>  return -1;
> diff --git a/net/tap-linux.c b/net/tap-linux.c
> index 304ff45071..f54f308d35 100644
> --- a/net/tap-linux.c
> +++ b/net/tap-linux.c
> @@ -45,7 +45,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
>  int len = sizeof(struct virtio_net_hdr);
>  unsigned int features;
>  
> -TFR(fd = open(PATH_NET_TUN, O_RDWR));
> +fd = RETRY_ON_EINTR(open(PATH_NET_TUN, O_RDWR));
>  if (fd < 0) {
>  error_setg_errno(errp, errno, "could not open %s", PATH_NET_TUN);
>  return -1;
> diff --git a/net/tap-solaris.c 

Re: [PATCH v5 09/10] acpi/tests/avocado/bits: add a README file to describe the test

2022-10-19 Thread Ani Sinha
On Wed, Oct 19, 2022 at 9:04 PM Alex Bennée  wrote:
>
>
> Ani Sinha  writes:
>
> > Add a README file that describes the purpose of the various test files and 
> > gives
> > guidance to developers on where and how to make changes.
> >
> > Cc: Daniel P. Berrange" 
> > Cc: Paolo Bonzini 
> > Cc: Maydell Peter 
> > Cc: John Snow 
> > Cc: Thomas Huth 
> > Cc: Alex Bennée 
> > Cc: Igor Mammedov 
> > Cc: Michael Tsirkin 
> > Signed-off-by: Ani Sinha 
> > ---
> >  tests/avocado/acpi-bits/README | 133 +
> >  1 file changed, 133 insertions(+)
> >  create mode 100644 tests/avocado/acpi-bits/README
> >
> > diff --git a/tests/avocado/acpi-bits/README b/tests/avocado/acpi-bits/README
> > new file mode 100644
> > index 00..4945dfc1f2
> > --- /dev/null
> > +++ b/tests/avocado/acpi-bits/README
> > @@ -0,0 +1,133 @@
> > +=
> > +ACPI/SMBIOS AVOCADO TESTS USING BIOSBITS
> > +=
> > +
> > +Biosbits is a software written by Josh Triplett that can be downloaded
> > +from https://biosbits.org/. The github codebase can be found here:
> > +https://github.com/biosbits/bits/tree/master. It is a software that 
> > executes
> > +the bios components such as acpi and smbios tables directly through acpica
> > +bios interpreter (a freely available C based library written by Intel,
> > +downloadable from https://acpica.org/ and is included with biosbits) 
> > without an
> > +operating system getting involved in between.
> > +There are several advantages to directly testing the bios in a real 
> > physical
> > +machine or VM as opposed to indirectly discovering bios issues through the
> > +operating system. For one thing, the OSes tend to hide bios problems from 
> > the
> > +end user. The other is that we have more control of what we wanted to test
> > +and how by directly using acpica interpreter on top of the bios on a 
> > running
> > +system. More details on the inspiration for developing biosbits and its 
> > real
> > +life uses can be found in (a) and (b).
> > +This directory contains tests written in python using avocado framework 
> > that
> > +exercizes the QEMU bios components using biosbits and reports test 
> > failures.
> > +For QEMU, we maintain a fork of bios bits in gitlab along with all the
> > +dependent submodules:
> > +https://gitlab.com/qemu-project/biosbits-bits
> > +This fork contains numerous fixes, a newer acpica and changes specific to
> > +running this avocado QEMU tests using bits. The author of this document
> > +is the sole maintainer of the QEMU fork of bios bits repo.
> > +
> > +Under the directory tests/avocado/, acpi-bits.py is a QEMU avocado test 
> > that
> > +drives all this.
> > +
> > +A brief description of the various test files follows.
> > +
> > +Under tests/avocado/ as the root we have:
> > +
> > +├── acpi-bits
> > +│ ├── bits-config
> > +│ │ └── bits-cfg.txt
> > +│ ├── bits-tests
> > +│ │ ├── smbios.py2
> > +│ │ ├── smilatency.py2
> > +│ │ ├── testacpi.py2
> > +│ │ └── testcpuid.py2
> > +│ └── README
> > +├── acpi-bits.py
> > +
> > +tests/avocado:
> > + - acpi-bits.py: This is the main python avocado test script that 
> > generates a
> > +   biosbits iso. It then spawns a QEMU VM with it, collects the log and 
> > reports
> > +   test failures. This is the script one would be interested in if they 
> > wanted
> > +   to add or change some component of the log parsing, add a new command 
> > line
> > +   to alter how QEMU is spawned etc. Test writers typically would not need 
> > to
> > +   modify this script unless they wanted to enhance or change the log 
> > parsing
> > +   for their tests. Following environment variables are used in this test:
> > + - V=1 : This enables verbose mode for the test. It dumps the entire 
> > log
> > +   from bios bits and also more details in case failure happens. It is
> > +   useful for debugging the test failures or tests themselves.
> > +
> > +   In order to run this test, please perform the following steps from the 
> > QEMU
> > +   build directory:
> > +
> > +   $ make check-venv (needed only the first time to create the venv)
> > +   $ ./tests/venv/bin/avocado run -t acpi tests/avocado
> > +
> > +   The above will run all acpi avocado tests including this one.
> > +   In order to run the individual tests, perform the following:
> > +
> > +   $ ./tests/venv/bin/avocado run tests/avocado/acpi-bits.py --tap -
> > +
> > +   The above will produce output in tap format. You can omit "--tap -" in 
> > the
> > +   end and it will produce output like the following:
> > +
> > +   $ ./tests/venv/bin/avocado run tests/avocado/acpi-bits.py
> > + Fetching asset from 
> > tests/avocado/acpi-bits.py:AcpiBitsTest.test_acpi_smbios_bits
> > + JOB ID : eab225724da7b64c012c65705dc2fa14ab1defef
> > + JOB LOG: 
> > 

Re: [PATCH v10 1/9] s390x/cpu topology: core_id sets s390x CPU topology

2022-10-19 Thread Pierre Morel




On 10/18/22 18:43, Cédric Le Goater wrote:

Hello Pierre,

On 10/12/22 18:20, Pierre Morel wrote:

In the S390x CPU topology the core_id specifies the CPU address
and the position of the core withing the topology.

Let's build the topology based on the core_id.
s390x/cpu topology: core_id sets s390x CPU topology

In the S390x CPU topology the core_id specifies the CPU address
and the position of the cpu withing the topology.

Let's build the topology based on the core_id.


The commit log is doubled.


Yes, thanks.





Signed-off-by: Pierre Morel 
---
  include/hw/s390x/cpu-topology.h |  45 +++
  hw/s390x/cpu-topology.c | 132 
  hw/s390x/s390-virtio-ccw.c  |  21 +
  hw/s390x/meson.build    |   1 +
  4 files changed, 199 insertions(+)
  create mode 100644 include/hw/s390x/cpu-topology.h
  create mode 100644 hw/s390x/cpu-topology.c

diff --git a/include/hw/s390x/cpu-topology.h 
b/include/hw/s390x/cpu-topology.h

new file mode 100644
index 00..66c171d0bc
--- /dev/null
+++ b/include/hw/s390x/cpu-topology.h
@@ -0,0 +1,45 @@
+/*
+ * CPU Topology
+ *
+ * Copyright 2022 IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or 
(at

+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#ifndef HW_S390X_CPU_TOPOLOGY_H
+#define HW_S390X_CPU_TOPOLOGY_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+typedef struct S390TopoContainer {
+    int active_count;
+} S390TopoContainer;


This structure does not seem very useful.


+
+#define S390_TOPOLOGY_CPU_IFL 0x03
+#define S390_TOPOLOGY_MAX_ORIGIN ((63 + S390_MAX_CPUS) / 64)
+typedef struct S390TopoTLE { 


The 'Topo' is redundant as TLE stands for 'topology-list entry'. This is 
minor.



+    uint64_t mask[S390_TOPOLOGY_MAX_ORIGIN];
+} S390TopoTLE;
+
+struct S390Topology {
+    SysBusDevice parent_obj;
+    int cpus;
+    S390TopoContainer *socket;
+    S390TopoTLE *tle;
+    MachineState *ms;


hmm, it would be cleaner to introduce the fields and properties needed
by the S390Topology model and avoid dragging the machine object pointer.
AFAICT, these properties would be :

   "nr-cpus"
   "max-cpus"
   "nr-sockets"



OK





+};
+
+#define TYPE_S390_CPU_TOPOLOGY "s390-topology"
+OBJECT_DECLARE_SIMPLE_TYPE(S390Topology, S390_CPU_TOPOLOGY)
+
+S390Topology *s390_get_topology(void);
+void s390_topology_new_cpu(int core_id);
+
+static inline bool s390_has_topology(void)
+{
+    return false;
+}
+
+#endif
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
new file mode 100644
index 00..42b22a1831
--- /dev/null
+++ b/hw/s390x/cpu-topology.c
@@ -0,0 +1,132 @@
+/*
+ * CPU Topology
+ *
+ * Copyright IBM Corp. 2022


The Copyright tag is different in the .h file.


OK, I change this to be like in the header file it seems to be the most 
used format.





+ * Author(s): Pierre Morel 
+
+ * This work is licensed under the terms of the GNU GPL, version 2 or 
(at

+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "hw/sysbus.h"
+#include "hw/qdev-properties.h"
+#include "hw/boards.h"
+#include "qemu/typedefs.h"
+#include "target/s390x/cpu.h"
+#include "hw/s390x/s390-virtio-ccw.h"
+#include "hw/s390x/cpu-topology.h"
+
+S390Topology *s390_get_topology(void)
+{
+    static S390Topology *s390Topology;
+
+    if (!s390Topology) {
+    s390Topology = S390_CPU_TOPOLOGY(
+    object_resolve_path(TYPE_S390_CPU_TOPOLOGY, NULL));
+    }
+
+    return s390Topology;


I am not convinced this routine is useful. The s390Topology pointer
could be stored under the machine state I think. It wouldn't be a
problem when CPUs are hot plugged since we have access to the machine
in the hot plug handler.


OK, I add a pointer to the machine state that will be initialised during 
s390_init_topology()




For the stsi call, 'struct ArchCPU' probably lacks a back pointer to
the machine objects with which CPU interact. These are typically
interrupt controllers or this new s390Topology model. You could add
the pointer there or, better, under a generic 'void *opaque' attribute.

That said, what you did works fine. The modeling could be cleaner.


Yes. I think you are right and I add a opaque pointer to the topology.




+}
+
+/*
+ * s390_topology_new_cpu:
+ * @core_id: the core ID is machine wide
+ *
+ * The topology returned by s390_get_topology(), gives us the CPU
+ * topology established by the -smp QEMU aruments.
+ * The core-id gives:
+ *  - the Container TLE (Topology List Entry) containing the CPU TLE.
+ *  - in the CPU TLE the origin, or offset of the first bit in the 
core mask

+ *  - the bit in the CPU TLE core mask
+ */
+void s390_topology_new_cpu(int core_id)
+{
+    S390Topology *topo = s390_get_topology();
+    int socket_id;
+    int bit, origin;
+
+    /* In the case no Topology 

Re: [PATCH v5 06/10] acpi/tests/avocado/bits: disable smilatency test since it does not pass everytime

2022-10-19 Thread Alex Bennée


Ani Sinha  writes:

> smilatency test is latency sensitive and does not pass deterministically when
> run in QEMU environment under biosbits. Disable the test suite for now.
>
> Example failure:
>
>  SMI latency test 
> Warning: touching the keyboard can affect the results of this test.
> Starting test. Wait here, I will be back in 15 seconds.
> [assert] SMI latency < 150us to minimize risk of OS timeouts FAIL
>   1us   < t <=  10us; average = 1372ns; count = 10912449
>Times between first few observations:  176us 1646ns 1441ns 1450ns 1462ns
>   10us  < t <= 100us; average = 16us; count = 1187
>Times between first few observations:   15ms 3148us 5856us   49ms   33ms
>   100us < t <=   1ms; average = 259us; count = 8
>Times between first few observations:  111ms 2227ms 1779ms  999ms  219ms
>   0 SMI detected using MSR_SMI_COUNT (MSR 0x34)
>   Summary of impact: observed maximum latency = 298us
> Summary: 0 passed, 1 failed
>
> Cc: Daniel P. Berrangé 
> Cc: Paolo Bonzini 
> Cc: Maydell Peter 
> Cc: John Snow 
> Cc: Thomas Huth 
> Cc: Alex Bennée 
> Cc: Igor Mammedov 
> Cc: Michael Tsirkin 
> Signed-off-by: Ani Sinha 
> ---
>  tests/avocado/acpi-bits/bits-tests/smilatency.py2 | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/tests/avocado/acpi-bits/bits-tests/smilatency.py2 
> b/tests/avocado/acpi-bits/bits-tests/smilatency.py2
> index d616970b31..e907c55cc2 100644
> --- a/tests/avocado/acpi-bits/bits-tests/smilatency.py2
> +++ b/tests/avocado/acpi-bits/bits-tests/smilatency.py2
> @@ -37,8 +37,9 @@ import time
>  import usb
>  
>  def register_tests():
> -testsuite.add_test("SMI latency test", smi_latency);
> -testsuite.add_test("SMI latency test with USB disabled via BIOS 
> handoff", test_with_usb_disabled, runall=False);
> +pass
> +# testsuite.add_test("SMI latency test", smi_latency);
> +# testsuite.add_test("SMI latency test with USB disabled via BIOS
>  handoff", test_with_usb_disabled, runall=False);

again this will break bisection.

>  
>  def smi_latency():
>  MSR_SMI_COUNT = 0x34


-- 
Alex Bennée



Re: [PATCH v8 1/8] mm/memfd: Introduce userspace inaccessible memfd

2022-10-19 Thread Kirill A . Shutemov
On Tue, Oct 18, 2022 at 07:12:10PM +0530, Vishal Annapurve wrote:
> On Tue, Oct 18, 2022 at 3:27 AM Kirill A . Shutemov
>  wrote:
> >
> > On Mon, Oct 17, 2022 at 06:39:06PM +0200, Gupta, Pankaj wrote:
> > > On 10/17/2022 6:19 PM, Kirill A . Shutemov wrote:
> > > > On Mon, Oct 17, 2022 at 03:00:21PM +0200, Vlastimil Babka wrote:
> > > > > On 9/15/22 16:29, Chao Peng wrote:
> > > > > > From: "Kirill A. Shutemov" 
> > > > > >
> > > > > > KVM can use memfd-provided memory for guest memory. For normal 
> > > > > > userspace
> > > > > > accessible memory, KVM userspace (e.g. QEMU) mmaps the memfd into 
> > > > > > its
> > > > > > virtual address space and then tells KVM to use the virtual address 
> > > > > > to
> > > > > > setup the mapping in the secondary page table (e.g. EPT).
> > > > > >
> > > > > > With confidential computing technologies like Intel TDX, the
> > > > > > memfd-provided memory may be encrypted with special key for special
> > > > > > software domain (e.g. KVM guest) and is not expected to be directly
> > > > > > accessed by userspace. Precisely, userspace access to such encrypted
> > > > > > memory may lead to host crash so it should be prevented.
> > > > > >
> > > > > > This patch introduces userspace inaccessible memfd (created with
> > > > > > MFD_INACCESSIBLE). Its memory is inaccessible from userspace through
> > > > > > ordinary MMU access (e.g. read/write/mmap) but can be accessed via
> > > > > > in-kernel interface so KVM can directly interact with core-mm 
> > > > > > without
> > > > > > the need to map the memory into KVM userspace.
> > > > > >
> > > > > > It provides semantics required for KVM guest private(encrypted) 
> > > > > > memory
> > > > > > support that a file descriptor with this flag set is going to be 
> > > > > > used as
> > > > > > the source of guest memory in confidential computing environments 
> > > > > > such
> > > > > > as Intel TDX/AMD SEV.
> > > > > >
> > > > > > KVM userspace is still in charge of the lifecycle of the memfd. It
> > > > > > should pass the opened fd to KVM. KVM uses the kernel APIs newly 
> > > > > > added
> > > > > > in this patch to obtain the physical memory address and then 
> > > > > > populate
> > > > > > the secondary page table entries.
> > > > > >
> > > > > > The userspace inaccessible memfd can be fallocate-ed and 
> > > > > > hole-punched
> > > > > > from userspace. When hole-punching happens, KVM can get notified 
> > > > > > through
> > > > > > inaccessible_notifier it then gets chance to remove any mapped 
> > > > > > entries
> > > > > > of the range in the secondary page tables.
> > > > > >
> > > > > > The userspace inaccessible memfd itself is implemented as a shim 
> > > > > > layer
> > > > > > on top of real memory file systems like tmpfs/hugetlbfs but this 
> > > > > > patch
> > > > > > only implemented tmpfs. The allocated memory is currently marked as
> > > > > > unmovable and unevictable, this is required for current confidential
> > > > > > usage. But in future this might be changed.
> > > > > >
> > > > > > Signed-off-by: Kirill A. Shutemov 
> > > > > > Signed-off-by: Chao Peng 
> > > > > > ---
> > > > >
> > > > > ...
> > > > >
> > > > > > +static long inaccessible_fallocate(struct file *file, int mode,
> > > > > > +  loff_t offset, loff_t len)
> > > > > > +{
> > > > > > +   struct inaccessible_data *data = 
> > > > > > file->f_mapping->private_data;
> > > > > > +   struct file *memfd = data->memfd;
> > > > > > +   int ret;
> > > > > > +
> > > > > > +   if (mode & FALLOC_FL_PUNCH_HOLE) {
> > > > > > +   if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
> > > > > > +   return -EINVAL;
> > > > > > +   }
> > > > > > +
> > > > > > +   ret = memfd->f_op->fallocate(memfd, mode, offset, len);
> > > > > > +   inaccessible_notifier_invalidate(data, offset, offset + 
> > > > > > len);
> > > > >
> > > > > Wonder if invalidate should precede the actual hole punch, otherwise 
> > > > > we open
> > > > > a window where the page tables point to memory no longer valid?
> > > >
> > > > Yes, you are right. Thanks for catching this.
> > >
> > > I also noticed this. But then thought the memory would be anyways zeroed
> > > (hole punched) before this call?
> >
> > Hole punching can free pages, given that offset/len covers full page.
> >
> > --
> >   Kiryl Shutsemau / Kirill A. Shutemov
> 
> I think moving this notifier_invalidate before fallocate may not solve
> the problem completely. Is it possible that between invalidate and
> fallocate, KVM tries to handle the page fault for the guest VM from
> another vcpu and uses the pages to be freed to back gpa ranges? Should
> hole punching here also update mem_attr first to say that KVM should
> consider the corresponding gpa ranges to be no more backed by
> inaccessible memfd?

We rely on external synchronization to prevent this. See code around
mmu_invalidate_retry_hva().

-- 
  Kiryl Shutsemau / Kirill 

Re: [PATCH v5 05/10] acpi/tests/avocado/bits: add SPDX license identifiers for bios bits smilatency tests

2022-10-19 Thread Ani Sinha
On Wed, Oct 19, 2022 at 9:02 PM Alex Bennée  wrote:
>
>
> Ani Sinha  writes:
>
> > Added the SPDX license identifier for smilatency tests.
> > Also added a comment indicating that smilatency test is run from within the
> > biosbits environment/VM and hence is not subjected to QEMU build/test
> > environment dependency fulfilments or QEMU maintanance activities.
> >
> > Cc: Daniel P. Berrangé 
> > Cc: Paolo Bonzini 
> > Cc: Maydell Peter 
> > Cc: John Snow 
> > Cc: Thomas Huth 
> > Cc: Alex Bennée 
> > Cc: Igor Mammedov 
> > Cc: Michael Tsirkin 
> > Signed-off-by: Ani Sinha 
> > ---
> >  tests/avocado/acpi-bits/bits-tests/smilatency.py2 | 4 
> >  1 file changed, 4 insertions(+)
>
> Thinking about this again, why not just merge this with the initial code
> dump?

I wanted to keep the original test scripts untouched from bits and
make our changes in separate commits. That way it helps to understand
exactly what we added/modified in the original bits test script.



Re: [PATCH v5 09/10] acpi/tests/avocado/bits: add a README file to describe the test

2022-10-19 Thread Alex Bennée


Ani Sinha  writes:

> Add a README file that describes the purpose of the various test files and 
> gives
> guidance to developers on where and how to make changes.
>
> Cc: Daniel P. Berrange" 
> Cc: Paolo Bonzini 
> Cc: Maydell Peter 
> Cc: John Snow 
> Cc: Thomas Huth 
> Cc: Alex Bennée 
> Cc: Igor Mammedov 
> Cc: Michael Tsirkin 
> Signed-off-by: Ani Sinha 
> ---
>  tests/avocado/acpi-bits/README | 133 +
>  1 file changed, 133 insertions(+)
>  create mode 100644 tests/avocado/acpi-bits/README
>
> diff --git a/tests/avocado/acpi-bits/README b/tests/avocado/acpi-bits/README
> new file mode 100644
> index 00..4945dfc1f2
> --- /dev/null
> +++ b/tests/avocado/acpi-bits/README
> @@ -0,0 +1,133 @@
> +=
> +ACPI/SMBIOS AVOCADO TESTS USING BIOSBITS
> +=
> +
> +Biosbits is a software written by Josh Triplett that can be downloaded
> +from https://biosbits.org/. The github codebase can be found here:
> +https://github.com/biosbits/bits/tree/master. It is a software that executes
> +the bios components such as acpi and smbios tables directly through acpica
> +bios interpreter (a freely available C based library written by Intel,
> +downloadable from https://acpica.org/ and is included with biosbits) without 
> an
> +operating system getting involved in between.
> +There are several advantages to directly testing the bios in a real physical
> +machine or VM as opposed to indirectly discovering bios issues through the
> +operating system. For one thing, the OSes tend to hide bios problems from the
> +end user. The other is that we have more control of what we wanted to test
> +and how by directly using acpica interpreter on top of the bios on a running
> +system. More details on the inspiration for developing biosbits and its real
> +life uses can be found in (a) and (b).
> +This directory contains tests written in python using avocado framework that
> +exercizes the QEMU bios components using biosbits and reports test failures.
> +For QEMU, we maintain a fork of bios bits in gitlab along with all the
> +dependent submodules:
> +https://gitlab.com/qemu-project/biosbits-bits
> +This fork contains numerous fixes, a newer acpica and changes specific to
> +running this avocado QEMU tests using bits. The author of this document
> +is the sole maintainer of the QEMU fork of bios bits repo.
> +
> +Under the directory tests/avocado/, acpi-bits.py is a QEMU avocado test that
> +drives all this.
> +
> +A brief description of the various test files follows.
> +
> +Under tests/avocado/ as the root we have:
> +
> +├── acpi-bits
> +│ ├── bits-config
> +│ │ └── bits-cfg.txt
> +│ ├── bits-tests
> +│ │ ├── smbios.py2
> +│ │ ├── smilatency.py2
> +│ │ ├── testacpi.py2
> +│ │ └── testcpuid.py2
> +│ └── README
> +├── acpi-bits.py
> +
> +tests/avocado:
> + - acpi-bits.py: This is the main python avocado test script that generates a
> +   biosbits iso. It then spawns a QEMU VM with it, collects the log and 
> reports
> +   test failures. This is the script one would be interested in if they 
> wanted
> +   to add or change some component of the log parsing, add a new command line
> +   to alter how QEMU is spawned etc. Test writers typically would not need to
> +   modify this script unless they wanted to enhance or change the log parsing
> +   for their tests. Following environment variables are used in this test:
> + - V=1 : This enables verbose mode for the test. It dumps the entire log
> +   from bios bits and also more details in case failure happens. It is
> +   useful for debugging the test failures or tests themselves.
> +
> +   In order to run this test, please perform the following steps from the 
> QEMU
> +   build directory:
> +
> +   $ make check-venv (needed only the first time to create the venv)
> +   $ ./tests/venv/bin/avocado run -t acpi tests/avocado
> +
> +   The above will run all acpi avocado tests including this one.
> +   In order to run the individual tests, perform the following:
> +
> +   $ ./tests/venv/bin/avocado run tests/avocado/acpi-bits.py --tap -
> +
> +   The above will produce output in tap format. You can omit "--tap -" in the
> +   end and it will produce output like the following:
> +
> +   $ ./tests/venv/bin/avocado run tests/avocado/acpi-bits.py
> + Fetching asset from 
> tests/avocado/acpi-bits.py:AcpiBitsTest.test_acpi_smbios_bits
> + JOB ID : eab225724da7b64c012c65705dc2fa14ab1defef
> + JOB LOG: 
> /home/anisinha/avocado/job-results/job-2022-10-10T17.58-eab2257/job.log
> + (1/1) tests/avocado/acpi-bits.py:AcpiBitsTest.test_acpi_smbios_bits: 
> PASS (33.09 s)
> + RESULTS: PASS 1 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 
> | CANCEL 0
> + JOB TIME   : 39.22 s
> +
> +   You can inspect the log file for more information about the run or in 
> order
> +   

Re: [PATCH v5 05/10] acpi/tests/avocado/bits: add SPDX license identifiers for bios bits smilatency tests

2022-10-19 Thread Alex Bennée


Ani Sinha  writes:

> Added the SPDX license identifier for smilatency tests.
> Also added a comment indicating that smilatency test is run from within the
> biosbits environment/VM and hence is not subjected to QEMU build/test
> environment dependency fulfilments or QEMU maintanance activities.
>
> Cc: Daniel P. Berrangé 
> Cc: Paolo Bonzini 
> Cc: Maydell Peter 
> Cc: John Snow 
> Cc: Thomas Huth 
> Cc: Alex Bennée 
> Cc: Igor Mammedov 
> Cc: Michael Tsirkin 
> Signed-off-by: Ani Sinha 
> ---
>  tests/avocado/acpi-bits/bits-tests/smilatency.py2 | 4 
>  1 file changed, 4 insertions(+)

Thinking about this again, why not just merge this with the initial code
dump?


-- 
Alex Bennée



Re: [PATCH v5 03/10] acpi/tests/avocado/bits: disable acpi PSS tests that are failing in biosbits

2022-10-19 Thread Alex Bennée


Ani Sinha  writes:

> PSS tests in acpi test suite seems to be failing in biosbits. This is because
> the test is unable to find PSS support in QEMU bios. Let us disable
> them for now so that make check does not fail. We can fix the tests and
> re-enable them later.
>
> Example failure:
>
>  ACPI _PSS (Pstate) table conformance tests 
> [assert] _PSS must exist FAIL
>   \_SB_.CPUS.C000
>   No _PSS exists
> Summary: 1 passed, 1 failed
>  ACPI _PSS (Pstate) runtime tests 
> [assert] _PSS must exist FAIL
>   \_SB_.CPUS.C000
>   No _PSS exists
> Summary: 0 passed, 1 failed
>
> Cc: Daniel P. Berrangé 
> Cc: Paolo Bonzini 
> Cc: Maydell Peter 
> Cc: John Snow 
> Cc: Thomas Huth 
> Cc: Alex Bennée 
> Cc: Igor Mammedov 
> Cc: Michael Tsirkin 
> Signed-off-by: Ani Sinha 
> ---
>  tests/avocado/acpi-bits/bits-tests/testacpi.py2 | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/tests/avocado/acpi-bits/bits-tests/testacpi.py2 
> b/tests/avocado/acpi-bits/bits-tests/testacpi.py2
> index 18dc818d62..f818a9cce6 100644
> --- a/tests/avocado/acpi-bits/bits-tests/testacpi.py2
> +++ b/tests/avocado/acpi-bits/bits-tests/testacpi.py2
> @@ -40,8 +40,8 @@ import time
>  
>  def register_tests():
>  testsuite.add_test("ACPI _MAT (Multiple APIC Table Entry) under 
> Processor objects", test_mat, submenu="ACPI Tests")
> -testsuite.add_test("ACPI _PSS (Pstate) table conformance tests", 
> test_pss, submenu="ACPI Tests")
> -testsuite.add_test("ACPI _PSS (Pstate) runtime tests", test_pstates, 
> submenu="ACPI Tests")
> +#testsuite.add_test("ACPI _PSS (Pstate) table conformance tests", 
> test_pss, submenu="ACPI Tests")
> +#testsuite.add_test("ACPI _PSS (Pstate) runtime tests", test_pstates, 
> submenu="ACPI Tests")
>  testsuite.add_test("ACPI DSDT (Differentiated System Description 
> Table)", test_dsdt, submenu="ACPI Tests")
>  testsuite.add_test("ACPI FACP (Fixed ACPI Description Table)", 
> test_facp, submenu="ACPI Tests")
>  testsuite.add_test("ACPI HPET (High Precision Event Timer Table)", 
> test_hpet, submenu="ACPI Tests")

I think this breaks bisection so should probably be included in the
commit that add the test with a comment in the commit message.

-- 
Alex Bennée



Re: [PATCH v5 02/10] acpi/tests/avocado/bits: add SPDX license identifiers for bios bits tests

2022-10-19 Thread Alex Bennée


Ani Sinha  writes:

> Added the SPDX license identifiers for biosbits tests.
> Also added a comment on each of the test scripts to indicate that they run
> from within the biosbits environment and hence are not subjected to the 
> regular
> maintanance acivities for QEMU and is excluded from the dependency management
> challenges in the host testing environment.
>
> Cc: Daniel P. Berrangé 
> Cc: Paolo Bonzini 
> Cc: Maydell Peter 
> Cc: John Snow 
> Cc: Thomas Huth 
> Cc: Alex Bennée 
> Cc: Igor Mammedov 
> Cc: Michael Tsirkin 
> Signed-off-by: Ani Sinha 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée



Re: [PATCH v3 2/2] error handling: Use RETRY_ON_EINTR() macro where applicable

2022-10-19 Thread Christian Schoenebeck
On Tuesday, October 18, 2022 10:43:41 AM CEST Nikita Ivanov wrote:
> There is a defined RETRY_ON_EINTR() macro in qemu/osdep.h
> which handles the same while loop.
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/415
> 
> Signed-off-by: Nikita Ivanov 
> ---
>  block/file-posix.c| 37 -
>  chardev/char-pty.c|  4 +---
>  hw/9pfs/9p-local.c|  8 ++--
>  net/l2tpv3.c  | 17 +
>  net/socket.c  | 16 +++-
>  net/tap.c |  8 ++--
>  qga/commands-posix.c  |  4 +---
>  semihosting/syscalls.c|  4 +---
>  tests/qtest/libqtest.c| 12 +---
>  tests/vhost-user-bridge.c |  4 +---
>  util/main-loop.c  |  4 +---
>  util/osdep.c  |  4 +---
>  util/vfio-helpers.c   | 12 ++--
>  13 files changed, 49 insertions(+), 85 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 23acffb9a4..8f7a22e3e4 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1229,9 +1229,7 @@ static int hdev_get_max_segments(int fd, struct stat 
> *st)
>  ret = -errno;
>  goto out;
>  }
> -do {
> -ret = read(sysfd, buf, sizeof(buf) - 1);
> -} while (ret == -1 && errno == EINTR);
> +ret = RETRY_ON_EINTR(read(sysfd, buf, sizeof(buf) - 1));
>  if (ret < 0) {
>  ret = -errno;
>  goto out;
> @@ -1379,9 +1377,9 @@ static int handle_aiocb_ioctl(void *opaque)
>  RawPosixAIOData *aiocb = opaque;
>  int ret;
>  
> -do {
> -ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf);
> -} while (ret == -1 && errno == EINTR);
> +ret = RETRY_ON_EINTR(
> +ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf)
> +);
>  if (ret == -1) {
>  return -errno;
>  }
> @@ -1463,18 +1461,17 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData 
> *aiocb)
>  {
>  ssize_t len;
>  
> -do {
> -if (aiocb->aio_type & QEMU_AIO_WRITE)
> -len = qemu_pwritev(aiocb->aio_fildes,
> -   aiocb->io.iov,
> -   aiocb->io.niov,
> -   aiocb->aio_offset);
> - else
> -len = qemu_preadv(aiocb->aio_fildes,
> -  aiocb->io.iov,
> -  aiocb->io.niov,
> -  aiocb->aio_offset);
> -} while (len == -1 && errno == EINTR);
> +len = RETRY_ON_EINTR(
> +(aiocb->aio_type & QEMU_AIO_WRITE) ?
> +qemu_pwritev(aiocb->aio_fildes,
> +   aiocb->io.iov,
> +   aiocb->io.niov,
> +   aiocb->aio_offset) :
> +qemu_preadv(aiocb->aio_fildes,
> +  aiocb->io.iov,
> +  aiocb->io.niov,
> +  aiocb->aio_offset)
> +);
>  
>  if (len == -1) {
>  return -errno;
> @@ -1899,9 +1896,7 @@ static int allocate_first_block(int fd, size_t max_size)
>  buf = qemu_memalign(max_align, write_size);
>  memset(buf, 0, write_size);
>  
> -do {
> -n = pwrite(fd, buf, write_size, 0);
> -} while (n == -1 && errno == EINTR);
> +n = RETRY_ON_EINTR(pwrite(fd, buf, write_size, 0));
>  
>  ret = (n == -1) ? -errno : 0;
>  
> diff --git a/chardev/char-pty.c b/chardev/char-pty.c
> index 53f25c6bbd..92fd33c854 100644
> --- a/chardev/char-pty.c
> +++ b/chardev/char-pty.c
> @@ -93,9 +93,7 @@ static void pty_chr_update_read_handler(Chardev *chr)
>  pfd.fd = fioc->fd;
>  pfd.events = G_IO_OUT;
>  pfd.revents = 0;
> -do {
> -rc = g_poll(, 1, 0);
> -} while (rc == -1 && errno == EINTR);
> +rc = RETRY_ON_EINTR(g_poll(, 1, 0));
>  assert(rc >= 0);
>  
>  if (pfd.revents & G_IO_HUP) {
> diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
> index d42ce6d8b8..bb3187244f 100644
> --- a/hw/9pfs/9p-local.c
> +++ b/hw/9pfs/9p-local.c
> @@ -470,9 +470,7 @@ static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath 
> *fs_path,
>  if (fd == -1) {
>  return -1;
>  }
> -do {
> -tsize = read(fd, (void *)buf, bufsz);
> -} while (tsize == -1 && errno == EINTR);
> +tsize = RETRY_ON_EINTR(read(fd, (void *)buf, bufsz));
>  close_preserve_errno(fd);
>  } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
> (fs_ctx->export_flags & V9FS_SM_NONE)) {
> @@ -908,9 +906,7 @@ static int local_symlink(FsContext *fs_ctx, const char 
> *oldpath,
>  }
>  /* Write the oldpath (target) to the file. */
>  oldpath_size = strlen(oldpath);
> -do {
> -write_size = write(fd, (void *)oldpath, oldpath_size);
> -} while (write_size == -1 && errno == EINTR);
> +write_size = RETRY_ON_EINTR(write(fd, (void *)oldpath, 
> 

  1   2   >