date:20230703

Re: [PATCH v2 3/5] ppc/pnv: Add P10 quad xscom model

2023-07-03 Thread Cédric Le Goater


On 7/4/23 07:42, Joel Stanley wrote:

Add a PnvQuad class for the P10 powernv machine. No xscoms are
implemented yet, but this allows them to be added.

The size is reduced to avoid the quad region from overlapping with the
core region.

   address-space: xscom-0
 -0003 (prio 0, i/o): xscom-0
   0001-0001000f (prio 0, i/o): xscom-quad.0
   000100108000-000100907fff (prio 0, i/o): xscom-core.3
   00010011-00010090 (prio 0, i/o): xscom-core.2
   00010012-00010091 (prio 0, i/o): xscom-core.1
   00010014-00010093 (prio 0, i/o): xscom-core.0

Signed-off-by: Joel Stanley 


Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
v2: Fix unimp read message
 Wrap lines at 80 col
 Set size
---
  include/hw/ppc/pnv_xscom.h |  2 +-
  hw/ppc/pnv.c   |  2 +-
  hw/ppc/pnv_core.c  | 54 ++
  3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
index cbe848d27ba0..f7da9a1dc617 100644
--- a/include/hw/ppc/pnv_xscom.h
+++ b/include/hw/ppc/pnv_xscom.h
@@ -129,7 +129,7 @@ struct PnvXScomInterfaceClass {
  
  #define PNV10_XSCOM_EQ_BASE(core) \

  ((uint64_t) PNV10_XSCOM_EQ(PNV10_XSCOM_EQ_CHIPLET(core)))
-#define PNV10_XSCOM_EQ_SIZE0x10
+#define PNV10_XSCOM_EQ_SIZE0x2
  
  #define PNV10_XSCOM_EC_BASE(core) \

  ((uint64_t) PNV10_XSCOM_EQ_BASE(core) | PNV10_XSCOM_EC(core & 0x3))
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index c77fdb6747a4..5f25fe985ab2 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1669,7 +1669,7 @@ static void pnv_chip_power10_quad_realize(Pnv10Chip 
*chip10, Error **errp)
  PnvQuad *eq = &chip10->quads[i];
  
  pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],

-  PNV_QUAD_TYPE_NAME("power9"));
+  PNV_QUAD_TYPE_NAME("power10"));
  
  pnv_xscom_add_subregion(chip, PNV10_XSCOM_EQ_BASE(eq->quad_id),

  &eq->xscom_regs);
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 73d25409c937..e4df435b15e9 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -404,6 +404,47 @@ static const MemoryRegionOps pnv_quad_power9_xscom_ops = {
  .endianness = DEVICE_BIG_ENDIAN,
  };
  
+/*

+ * POWER10 Quads
+ */
+
+static uint64_t pnv_quad_power10_xscom_read(void *opaque, hwaddr addr,
+unsigned int width)
+{
+uint32_t offset = addr >> 3;
+uint64_t val = -1;
+
+switch (offset) {
+default:
+qemu_log_mask(LOG_UNIMP, "%s: reading @0x%08x\n", __func__,
+  offset);
+}
+
+return val;
+}
+
+static void pnv_quad_power10_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned int width)
+{
+uint32_t offset = addr >> 3;
+
+switch (offset) {
+default:
+qemu_log_mask(LOG_UNIMP, "%s: writing @0x%08x\n", __func__,
+  offset);
+}
+}
+
+static const MemoryRegionOps pnv_quad_power10_xscom_ops = {
+.read = pnv_quad_power10_xscom_read,
+.write = pnv_quad_power10_xscom_write,
+.valid.min_access_size = 8,
+.valid.max_access_size = 8,
+.impl.min_access_size = 8,
+.impl.max_access_size = 8,
+.endianness = DEVICE_BIG_ENDIAN,
+};
+
  static void pnv_quad_realize(DeviceState *dev, Error **errp)
  {
  PnvQuad *eq = PNV_QUAD(dev);
@@ -430,6 +471,14 @@ static void pnv_quad_power9_class_init(ObjectClass *oc, 
void *data)
  pqc->xscom_size = PNV9_XSCOM_EQ_SIZE;
  }
  
+static void pnv_quad_power10_class_init(ObjectClass *oc, void *data)

+{
+PnvQuadClass *pqc = PNV_QUAD_CLASS(oc);
+
+pqc->xscom_ops = &pnv_quad_power10_xscom_ops;
+pqc->xscom_size = PNV10_XSCOM_EQ_SIZE;
+}
+
  static void pnv_quad_class_init(ObjectClass *oc, void *data)
  {
  DeviceClass *dc = DEVICE_CLASS(oc);
@@ -453,6 +502,11 @@ static const TypeInfo pnv_quad_infos[] = {
  .name = PNV_QUAD_TYPE_NAME("power9"),
  .class_init = pnv_quad_power9_class_init,
  },
+{
+.parent = TYPE_PNV_QUAD,
+.name = PNV_QUAD_TYPE_NAME("power10"),
+.class_init = pnv_quad_power10_class_init,
+},
  };
  
  DEFINE_TYPES(pnv_quad_infos);

Re: [PATCH v2 4/5] ppc/pnv: Add P10 core xscom model

2023-07-03 Thread Cédric Le Goater


On 7/4/23 07:42, Joel Stanley wrote:

Like the quad xscoms, add a core model for P10 to allow future
differentiation from P9.

Signed-off-by: Joel Stanley 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/ppc/pnv_core.c | 44 ++--
  1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index e4df435b15e9..1eec28c88c41 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -167,6 +167,47 @@ static const MemoryRegionOps pnv_core_power9_xscom_ops = {
  .endianness = DEVICE_BIG_ENDIAN,
  };
  
+/*

+ * POWER10 core controls
+ */
+
+static uint64_t pnv_core_power10_xscom_read(void *opaque, hwaddr addr,
+   unsigned int width)
+{
+uint32_t offset = addr >> 3;
+uint64_t val = 0;
+
+switch (offset) {
+default:
+qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx "\n",
+  addr);
+}
+
+return val;
+}
+
+static void pnv_core_power10_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned int width)
+{
+uint32_t offset = addr >> 3;
+
+switch (offset) {
+default:
+qemu_log_mask(LOG_UNIMP, "Warning: writing to reg=0x%" HWADDR_PRIx 
"\n",
+  addr);
+}
+}
+
+static const MemoryRegionOps pnv_core_power10_xscom_ops = {
+.read = pnv_core_power10_xscom_read,
+.write = pnv_core_power10_xscom_write,
+.valid.min_access_size = 8,
+.valid.max_access_size = 8,
+.impl.min_access_size = 8,
+.impl.max_access_size = 8,
+.endianness = DEVICE_BIG_ENDIAN,
+};
+
  static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp)
  {
  CPUPPCState *env = &cpu->env;
@@ -315,8 +356,7 @@ static void pnv_core_power10_class_init(ObjectClass *oc, 
void *data)
  {
  PnvCoreClass *pcc = PNV_CORE_CLASS(oc);
  
-/* TODO: Use the P9 XSCOMs for now on P10 */

-pcc->xscom_ops = &pnv_core_power9_xscom_ops;
+pcc->xscom_ops = &pnv_core_power10_xscom_ops;
  }
  
  static void pnv_core_class_init(ObjectClass *oc, void *data)

Re: [PATCH 2/2] tpm_crb: mark memory as protected

2023-07-03 Thread Laurent Vivier


Hi,

as the region is already skipped by the test of the memory region alignment, I'm going to 
update my patches by only removing the error_report() as proposed by Peter.


I will replace it by a trace to help to debug.

Thanks,
Laurent

On 7/4/23 05:07, Jason Wang wrote:

On Thu, Jun 22, 2023 at 9:39 PM Laurent Vivier  wrote:


On 6/22/23 15:12, Peter Maydell wrote:

On Tue, 20 Jun 2023 at 20:51, Laurent Vivier  wrote:


This memory is not correctly aligned and cannot be registered
by vDPA and VFIO.


Isn't this a vDPA/VFIO problem? There's no requirement
for RAM MemoryRegions to be aligned in any way.


It's more about the limitation of the IOMMU which can't do subpage protection.


Code
that doesn't want to work with small or weirdly aligned
regions should skip them if that's the right behaviour
for that particular code IMHO.


We had already had this:

 if ((!memory_region_is_ram(section->mr) &&
  !memory_region_is_iommu(section->mr)) ||
 memory_region_is_protected(section->mr) ||
 /* vhost-vDPA doesn't allow MMIO to be mapped  */
 memory_region_is_ram_device(section->mr)) {
 return true;
 }





Marc-André proposed to modify vDPA code to skip the region but Michal disagreed:

https://lists.nongnu.org/archive/html/qemu-devel/2022-11/msg03670.html

No one wants the modification, so the problem cannot be fixed.



Yes, otherwise we end up with explicit check for TPM crb in vhost code...

Thanks


Thanks,
Laurent

Re: [PATCH 1/1] pcie: Add hotplug detect state register to w1cmask

2023-07-03 Thread Michael S. Tsirkin

On Tue, Jul 04, 2023 at 03:20:36AM -0300, Leonardo Brás wrote:
> Hello Peter and Michael, I have a few updates on this:
> 
> On Mon, 2023-07-03 at 02:20 -0300, Leonardo Brás wrote:
> > Hello Peter and Michael, thanks for reviewing!
> > 
> > 
> > On Thu, 2023-06-29 at 16:56 -0400, Peter Xu wrote:
> > > On Thu, Jun 29, 2023 at 04:06:53PM -0400, Michael S. Tsirkin wrote:
> > > > On Thu, Jun 29, 2023 at 04:01:41PM -0400, Peter Xu wrote:
> > > > > On Thu, Jun 29, 2023 at 03:33:06PM -0400, Michael S. Tsirkin wrote:
> > > > > > On Thu, Jun 29, 2023 at 01:01:53PM -0400, Peter Xu wrote:
> > > > > > > Hi, Leo,
> > > > > > > 
> > > > > > > Thanks for figuring this out.  Let me copy a few more potential 
> > > > > > > reviewers
> > > > > > > from commit 17858a1695 ("hw/acpi/ich9: Set ACPI PCI hot-plug as 
> > > > > > > default on
> > > > > > > Q35").
> > > > > > > 
> > > > > > > On Thu, Jun 29, 2023 at 06:05:00AM -0300, Leonardo Bras wrote:
> > > > > > > > When trying to migrate a machine type pc-q35-6.0 or lower, with 
> > > > > > > > this
> > > > > > > > cmdline options:
> > > > > > > > 
> > > > > > > > -device 
> > > > > > > > driver=pcie-root-port,port=18,chassis=19,id=pcie-root-port18,bus=pcie.0,addr=0x12
> > > > > > > >  \
> > > > > > > > -device 
> > > > > > > > driver=nec-usb-xhci,p2=4,p3=4,id=nex-usb-xhci0,bus=pcie-root-port18,addr=0x12.0x1
> > > > > > > > 
> > > > > > > > the following bug happens after all ram pages were sent:
> > > > > > > > 
> > > > > > > > qemu-kvm: get_pci_config_device: Bad config data: i=0x6e read: 
> > > > > > > > 0 device: 40 cmask: ff wmask: 0 w1cmask:19
> > > > > > > > qemu-kvm: Failed to load PCIDevice:config
> > > > > > > > qemu-kvm: Failed to load 
> > > > > > > > pcie-root-port:parent_obj.parent_obj.parent_obj
> > > > > > > > qemu-kvm: error while loading state for instance 0x0 of device 
> > > > > > > > ':00:12.0/pcie-root-port'
> > > > > > > > qemu-kvm: load of migration failed: Invalid argument
> > > > > > > > 
> > > > > > > > This happens on pc-q35-6.0 or lower because of:
> > > > > > > > { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }
> > > > > > > > 
> > > > > > > > In this scenario, hotplug_handler_plug() calls 
> > > > > > > > pcie_cap_slot_plug_cb(),
> > > > > > > > which sets the bus dev->config byte 0x6e with bit 
> > > > > > > > PCI_EXP_SLTSTA_PDS to 
> > > > > > > > signal PCI hotplug for the guest. After a while the guest will 
> > > > > > > > deal with
> > > > > > > > this hotplug and qemu will clear the above bit.
> > > > > > 
> > > > > > Presence Detect State – This bit indicates the presence of an
> > > > > > adapter in the slot, reflected by the logical “OR” of the Physical
> > > > > > Layer in-band presence detect mechanism and, if present, any
> > > > > > out-of-band presence detect mechanism defined for the slot’s
> > > > > > corresponding form factor. Note that the in-band presence
> > > > > > detect mechanism requires that power be applied to an adapter
> > > > > > for its presence to be detected. Consequently, form factors that
> > > > > > require a power controller for hot-plug must implement a
> > > > > > physical pin presence detect mechanism.
> > > > > > RO
> > > > > > Defined encodings are:
> > > > > > 0b Slot Empty
> > > > > > 1b Card Present in slot
> > > > > > This bit must be implemented on all Downstream Ports that
> > > > > > implement slots. For Downstream Ports not connected to slots
> > > > > > (where the Slot Implemented bit of the PCI Express Capabilities
> > > > > > register is 0b), this bit must be hardwired to 1b.
> > 
> > Thank you for providing this doc!
> > I am new to PCI stuff, could you please point this doc?
> 
> (I mean, the link to the documentation)

The pci specs are all here: https://pcisig.com/
Red Hat is a member so just register, it's free.

I'd get the 5.0 version of pci express base:
https://members.pcisig.com/wg/PCI-SIG/document/13005

6.0 is out but they did something to make it take years to open,
and it shouldn't matter for this.

> > 
> > > > > > 
> > > > > > 
> > > > > > And this seems to match what QEMU is doing: it clears on unplug
> > > > > > not after guest deals with hotplug.
> > 
> > Oh, that's weird.
> > It should not unplug the device, so IIUC it should not clear the bit.
> > Maybe something weird is happening in the guest, I will take a look.
> 
> Updates on this:
> You are right! For some reason the guest is hot-unplugging the device under 
> some
> conditions, so there is another bug on this for me to look after.
> 
> > 
> > > > > > 
> > > > > > 
> > > > > > > > Then, during migration, get_pci_config_device() will compare the
> > > > > > > > configs of both the freshly created device and the one that is 
> > > > > > > > being
> > > > > > > > received via migration, which will differ due to the 
> > > > > > > > PCI_EXP_SLTSTA_PDS bit
> > > > > > > > and cause the bug to reproduce.
> > > > > > 
> > > > > > So bit is set on source.
> > > > > > But why is the bit cleared on destination? This is

Re: [PATCH v2] hw/ide/piix: properly initialize the BMIBA register

2023-07-03 Thread Paolo Bonzini


On 7/3/23 22:33, Bernhard Beschow wrote:

Paolo, Peter: Should we switch to pci_device_reset() in
pci_xen_ide_unplug()? Or is device_cold_reset() supposed to do
everything?


device_cold_reset() does not reset state that is part of the bus, so I 
think it's consistent that it doesn't call pci_do_device_reset().


I agree that calling pci_device_reset() would be a better match for 
pci_xen_ide_unplug().


Paolo

Re: [PATCH 1/1] pcie: Add hotplug detect state register to w1cmask

2023-07-03 Thread Leonardo Brás

Hello Peter and Michael, I have a few updates on this:

On Mon, 2023-07-03 at 02:20 -0300, Leonardo Brás wrote:
> Hello Peter and Michael, thanks for reviewing!
> 
> 
> On Thu, 2023-06-29 at 16:56 -0400, Peter Xu wrote:
> > On Thu, Jun 29, 2023 at 04:06:53PM -0400, Michael S. Tsirkin wrote:
> > > On Thu, Jun 29, 2023 at 04:01:41PM -0400, Peter Xu wrote:
> > > > On Thu, Jun 29, 2023 at 03:33:06PM -0400, Michael S. Tsirkin wrote:
> > > > > On Thu, Jun 29, 2023 at 01:01:53PM -0400, Peter Xu wrote:
> > > > > > Hi, Leo,
> > > > > > 
> > > > > > Thanks for figuring this out.  Let me copy a few more potential 
> > > > > > reviewers
> > > > > > from commit 17858a1695 ("hw/acpi/ich9: Set ACPI PCI hot-plug as 
> > > > > > default on
> > > > > > Q35").
> > > > > > 
> > > > > > On Thu, Jun 29, 2023 at 06:05:00AM -0300, Leonardo Bras wrote:
> > > > > > > When trying to migrate a machine type pc-q35-6.0 or lower, with 
> > > > > > > this
> > > > > > > cmdline options:
> > > > > > > 
> > > > > > > -device 
> > > > > > > driver=pcie-root-port,port=18,chassis=19,id=pcie-root-port18,bus=pcie.0,addr=0x12
> > > > > > >  \
> > > > > > > -device 
> > > > > > > driver=nec-usb-xhci,p2=4,p3=4,id=nex-usb-xhci0,bus=pcie-root-port18,addr=0x12.0x1
> > > > > > > 
> > > > > > > the following bug happens after all ram pages were sent:
> > > > > > > 
> > > > > > > qemu-kvm: get_pci_config_device: Bad config data: i=0x6e read: 0 
> > > > > > > device: 40 cmask: ff wmask: 0 w1cmask:19
> > > > > > > qemu-kvm: Failed to load PCIDevice:config
> > > > > > > qemu-kvm: Failed to load 
> > > > > > > pcie-root-port:parent_obj.parent_obj.parent_obj
> > > > > > > qemu-kvm: error while loading state for instance 0x0 of device 
> > > > > > > ':00:12.0/pcie-root-port'
> > > > > > > qemu-kvm: load of migration failed: Invalid argument
> > > > > > > 
> > > > > > > This happens on pc-q35-6.0 or lower because of:
> > > > > > > { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }
> > > > > > > 
> > > > > > > In this scenario, hotplug_handler_plug() calls 
> > > > > > > pcie_cap_slot_plug_cb(),
> > > > > > > which sets the bus dev->config byte 0x6e with bit 
> > > > > > > PCI_EXP_SLTSTA_PDS to 
> > > > > > > signal PCI hotplug for the guest. After a while the guest will 
> > > > > > > deal with
> > > > > > > this hotplug and qemu will clear the above bit.
> > > > > 
> > > > > Presence Detect State – This bit indicates the presence of an
> > > > > adapter in the slot, reflected by the logical “OR” of the Physical
> > > > > Layer in-band presence detect mechanism and, if present, any
> > > > > out-of-band presence detect mechanism defined for the slot’s
> > > > > corresponding form factor. Note that the in-band presence
> > > > > detect mechanism requires that power be applied to an adapter
> > > > > for its presence to be detected. Consequently, form factors that
> > > > > require a power controller for hot-plug must implement a
> > > > > physical pin presence detect mechanism.
> > > > > RO
> > > > > Defined encodings are:
> > > > > 0b Slot Empty
> > > > > 1b Card Present in slot
> > > > > This bit must be implemented on all Downstream Ports that
> > > > > implement slots. For Downstream Ports not connected to slots
> > > > > (where the Slot Implemented bit of the PCI Express Capabilities
> > > > > register is 0b), this bit must be hardwired to 1b.
> 
> Thank you for providing this doc!
> I am new to PCI stuff, could you please point this doc?

(I mean, the link to the documentation)

> 
> > > > > 
> > > > > 
> > > > > And this seems to match what QEMU is doing: it clears on unplug
> > > > > not after guest deals with hotplug.
> 
> Oh, that's weird.
> It should not unplug the device, so IIUC it should not clear the bit.
> Maybe something weird is happening in the guest, I will take a look.

Updates on this:
You are right! For some reason the guest is hot-unplugging the device under some
conditions, so there is another bug on this for me to look after.

> 
> > > > > 
> > > > > 
> > > > > > > Then, during migration, get_pci_config_device() will compare the
> > > > > > > configs of both the freshly created device and the one that is 
> > > > > > > being
> > > > > > > received via migration, which will differ due to the 
> > > > > > > PCI_EXP_SLTSTA_PDS bit
> > > > > > > and cause the bug to reproduce.
> > > > > 
> > > > > So bit is set on source.
> > > > > But why is the bit cleared on destination? This is the part I don't 
> > > > > get.
> 
> No, bit is set when the device is created by qemu.
> After some time running (boot process completion) the bit is cleared.

The 'after some time' here is about the guest hot-unplugging the device.

> 
> The receiving end of migration will then create the device with the bit set, 
> and
> then wait for migration. After the source device is received, the compare 
> fails
> due to those bits being different.
> 

But anyway, there is some chance the device will be hot-unplugged by the guest
OS for any reason, so we need to

[PATCH v2 2/5] ppc/pnv: Subclass quad xscom callbacks

2023-07-03 Thread Joel Stanley

Make the existing pnv_quad_xscom_read/write be P9 specific, in
preparation for a different P10 callback.

Reviewed-by: Cédric Le Goater 
Signed-off-by: Joel Stanley 
---
v2: Add scom region size to class
---
 include/hw/ppc/pnv_core.h | 13 -
 hw/ppc/pnv.c  | 11 +++
 hw/ppc/pnv_core.c | 40 ++-
 3 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index 3d75706e95da..77ef00f47a72 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -60,8 +60,19 @@ static inline PnvCPUState *pnv_cpu_state(PowerPCCPU *cpu)
 return (PnvCPUState *)cpu->machine_data;
 }
 
+struct PnvQuadClass {
+DeviceClass parent_class;
+
+const MemoryRegionOps *xscom_ops;
+uint64_t xscom_size;
+};
+
 #define TYPE_PNV_QUAD "powernv-cpu-quad"
-OBJECT_DECLARE_SIMPLE_TYPE(PnvQuad, PNV_QUAD)
+
+#define PNV_QUAD_TYPE_SUFFIX "-" TYPE_PNV_QUAD
+#define PNV_QUAD_TYPE_NAME(cpu_model) cpu_model PNV_QUAD_TYPE_SUFFIX
+
+OBJECT_DECLARE_TYPE(PnvQuad, PnvQuadClass, PNV_QUAD)
 
 struct PnvQuad {
 DeviceState parent_obj;
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index fc083173f346..c77fdb6747a4 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1429,14 +1429,15 @@ static void pnv_chip_power9_instance_init(Object *obj)
 }
 
 static void pnv_chip_quad_realize_one(PnvChip *chip, PnvQuad *eq,
-  PnvCore *pnv_core)
+  PnvCore *pnv_core,
+  const char *type)
 {
 char eq_name[32];
 int core_id = CPU_CORE(pnv_core)->core_id;
 
 snprintf(eq_name, sizeof(eq_name), "eq[%d]", core_id);
 object_initialize_child_with_props(OBJECT(chip), eq_name, eq,
-   sizeof(*eq), TYPE_PNV_QUAD,
+   sizeof(*eq), type,
&error_fatal, NULL);
 
 object_property_set_int(OBJECT(eq), "quad-id", core_id, &error_fatal);
@@ -1454,7 +1455,8 @@ static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error 
**errp)
 for (i = 0; i < chip9->nr_quads; i++) {
 PnvQuad *eq = &chip9->quads[i];
 
-pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4]);
+pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],
+  PNV_QUAD_TYPE_NAME("power9"));
 
 pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->quad_id),
 &eq->xscom_regs);
@@ -1666,7 +1668,8 @@ static void pnv_chip_power10_quad_realize(Pnv10Chip 
*chip10, Error **errp)
 for (i = 0; i < chip10->nr_quads; i++) {
 PnvQuad *eq = &chip10->quads[i];
 
-pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4]);
+pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],
+  PNV_QUAD_TYPE_NAME("power9"));
 
 pnv_xscom_add_subregion(chip, PNV10_XSCOM_EQ_BASE(eq->quad_id),
 &eq->xscom_regs);
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 0f451b3b6e1f..73d25409c937 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -407,12 +407,14 @@ static const MemoryRegionOps pnv_quad_power9_xscom_ops = {
 static void pnv_quad_realize(DeviceState *dev, Error **errp)
 {
 PnvQuad *eq = PNV_QUAD(dev);
+PnvQuadClass *pqc = PNV_QUAD_GET_CLASS(eq);
 char name[32];
 
 snprintf(name, sizeof(name), "xscom-quad.%d", eq->quad_id);
 pnv_xscom_region_init(&eq->xscom_regs, OBJECT(dev),
-  &pnv_quad_power9_xscom_ops,
-  eq, name, PNV9_XSCOM_EQ_SIZE);
+  pqc->xscom_ops,
+  eq, name,
+  pqc->xscom_size);
 }
 
 static Property pnv_quad_properties[] = {
@@ -420,6 +422,14 @@ static Property pnv_quad_properties[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
+static void pnv_quad_power9_class_init(ObjectClass *oc, void *data)
+{
+PnvQuadClass *pqc = PNV_QUAD_CLASS(oc);
+
+pqc->xscom_ops = &pnv_quad_power9_xscom_ops;
+pqc->xscom_size = PNV9_XSCOM_EQ_SIZE;
+}
+
 static void pnv_quad_class_init(ObjectClass *oc, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(oc);
@@ -429,16 +439,20 @@ static void pnv_quad_class_init(ObjectClass *oc, void 
*data)
 dc->user_creatable = false;
 }
 
-static const TypeInfo pnv_quad_info = {
-.name  = TYPE_PNV_QUAD,
-.parent= TYPE_DEVICE,
-.instance_size = sizeof(PnvQuad),
-.class_init= pnv_quad_class_init,
+static const TypeInfo pnv_quad_infos[] = {
+{
+.name  = TYPE_PNV_QUAD,
+.parent= TYPE_DEVICE,
+.instance_size = sizeof(PnvQuad),
+.class_size= sizeof(PnvQuadClass),
+.class_init= pnv_quad_class_init,
+.abstract  = true,
+},
+{
+.parent = TYPE_PNV_QUAD,
+.name = PNV_QUAD_

[PATCH v2 1/5] ppc/pnv: quad xscom callbacks are P9 specific

2023-07-03 Thread Joel Stanley

Rename the functions to include P9 in the name in preparation for adding
P10 versions.

Correct the unimp read message while we're changing the function.

Reviewed-by: Cédric Le Goater 
Signed-off-by: Joel Stanley 
---
v2: Fix unimp print, and grammar in the commit message
---
 hw/ppc/pnv_core.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 0bc3ad41c81c..0f451b3b6e1f 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -360,8 +360,8 @@ DEFINE_TYPES(pnv_core_infos)
 
 #define P9X_EX_NCU_SPEC_BAR 0x11010
 
-static uint64_t pnv_quad_xscom_read(void *opaque, hwaddr addr,
-unsigned int width)
+static uint64_t pnv_quad_power9_xscom_read(void *opaque, hwaddr addr,
+   unsigned int width)
 {
 uint32_t offset = addr >> 3;
 uint64_t val = -1;
@@ -372,15 +372,15 @@ static uint64_t pnv_quad_xscom_read(void *opaque, hwaddr 
addr,
 val = 0;
 break;
 default:
-qemu_log_mask(LOG_UNIMP, "%s: writing @0x%08x\n", __func__,
+qemu_log_mask(LOG_UNIMP, "%s: reading @0x%08x\n", __func__,
   offset);
 }
 
 return val;
 }
 
-static void pnv_quad_xscom_write(void *opaque, hwaddr addr, uint64_t val,
- unsigned int width)
+static void pnv_quad_power9_xscom_write(void *opaque, hwaddr addr, uint64_t 
val,
+unsigned int width)
 {
 uint32_t offset = addr >> 3;
 
@@ -394,9 +394,9 @@ static void pnv_quad_xscom_write(void *opaque, hwaddr addr, 
uint64_t val,
 }
 }
 
-static const MemoryRegionOps pnv_quad_xscom_ops = {
-.read = pnv_quad_xscom_read,
-.write = pnv_quad_xscom_write,
+static const MemoryRegionOps pnv_quad_power9_xscom_ops = {
+.read = pnv_quad_power9_xscom_read,
+.write = pnv_quad_power9_xscom_write,
 .valid.min_access_size = 8,
 .valid.max_access_size = 8,
 .impl.min_access_size = 8,
@@ -410,7 +410,8 @@ static void pnv_quad_realize(DeviceState *dev, Error **errp)
 char name[32];
 
 snprintf(name, sizeof(name), "xscom-quad.%d", eq->quad_id);
-pnv_xscom_region_init(&eq->xscom_regs, OBJECT(dev), &pnv_quad_xscom_ops,
+pnv_xscom_region_init(&eq->xscom_regs, OBJECT(dev),
+  &pnv_quad_power9_xscom_ops,
   eq, name, PNV9_XSCOM_EQ_SIZE);
 }
 
-- 
2.40.1

[PATCH v2 0/5] ppc/pnv: Extend "quad" model for p10

2023-07-03 Thread Joel Stanley

The quad model implements the EC xscoms for the p9 machine, reusing the
same model for p10 which isn't quite correct. This series adds a PnvQuad
class and subclasses it for P9 and P10.

I mistakenly thought we needed the quad model to implement the core
thread state scom on p10, because the read was coming in to the address
belonging to the quad. In fact the quad region was too large,
overlapping with the core. This is fixed in v2, and the core thread is
back where it should be in the core model. This should address Nick's
feedback on the v1 cover letter.

v2 also adds Cedric's r-b, fixes the s/write/read/ mistakes, and is
checkpatch clean.

v1: https://lore.kernel.org/qemu-devel/20230630035547.80329-1-j...@jms.id.au/

Joel Stanley (5):
  ppc/pnv: quad xscom callbacks are P9 specific
  ppc/pnv: Subclass quad xscom callbacks
  ppc/pnv: Add P10 quad xscom model
  ppc/pnv: Add P10 core xscom model
  ppc/pnv: Return zero for core thread state xscom

 include/hw/ppc/pnv_core.h  |  13 ++-
 include/hw/ppc/pnv_xscom.h |   2 +-
 hw/ppc/pnv.c   |  11 ++-
 hw/ppc/pnv_core.c  | 165 +++--
 4 files changed, 162 insertions(+), 29 deletions(-)

-- 
2.40.1

[PATCH v2 4/5] ppc/pnv: Add P10 core xscom model

2023-07-03 Thread Joel Stanley

Like the quad xscoms, add a core model for P10 to allow future
differentiation from P9.

Signed-off-by: Joel Stanley 
---
 hw/ppc/pnv_core.c | 44 ++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index e4df435b15e9..1eec28c88c41 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -167,6 +167,47 @@ static const MemoryRegionOps pnv_core_power9_xscom_ops = {
 .endianness = DEVICE_BIG_ENDIAN,
 };
 
+/*
+ * POWER10 core controls
+ */
+
+static uint64_t pnv_core_power10_xscom_read(void *opaque, hwaddr addr,
+   unsigned int width)
+{
+uint32_t offset = addr >> 3;
+uint64_t val = 0;
+
+switch (offset) {
+default:
+qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx "\n",
+  addr);
+}
+
+return val;
+}
+
+static void pnv_core_power10_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned int width)
+{
+uint32_t offset = addr >> 3;
+
+switch (offset) {
+default:
+qemu_log_mask(LOG_UNIMP, "Warning: writing to reg=0x%" HWADDR_PRIx 
"\n",
+  addr);
+}
+}
+
+static const MemoryRegionOps pnv_core_power10_xscom_ops = {
+.read = pnv_core_power10_xscom_read,
+.write = pnv_core_power10_xscom_write,
+.valid.min_access_size = 8,
+.valid.max_access_size = 8,
+.impl.min_access_size = 8,
+.impl.max_access_size = 8,
+.endianness = DEVICE_BIG_ENDIAN,
+};
+
 static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp)
 {
 CPUPPCState *env = &cpu->env;
@@ -315,8 +356,7 @@ static void pnv_core_power10_class_init(ObjectClass *oc, 
void *data)
 {
 PnvCoreClass *pcc = PNV_CORE_CLASS(oc);
 
-/* TODO: Use the P9 XSCOMs for now on P10 */
-pcc->xscom_ops = &pnv_core_power9_xscom_ops;
+pcc->xscom_ops = &pnv_core_power10_xscom_ops;
 }
 
 static void pnv_core_class_init(ObjectClass *oc, void *data)
-- 
2.40.1

[PATCH v2 5/5] ppc/pnv: Return zero for core thread state xscom

2023-07-03 Thread Joel Stanley

Firmware now warns if booting in LPAR per core mode (PPC bit 62). So
this warning doesn't trigger, report the core thread state is 0.

Reviewed-by: Cédric Le Goater 
Signed-off-by: Joel Stanley 
---
 hw/ppc/pnv_core.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 1eec28c88c41..b7223bb44597 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -116,6 +116,8 @@ static const MemoryRegionOps pnv_core_power8_xscom_ops = {
 #define PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_HYP 0xf010d
 #define PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_OTR 0xf010a
 
+#define PNV9_XSCOM_EC_CORE_THREAD_STATE0x10ab3
+
 static uint64_t pnv_core_power9_xscom_read(void *opaque, hwaddr addr,
unsigned int width)
 {
@@ -134,6 +136,9 @@ static uint64_t pnv_core_power9_xscom_read(void *opaque, 
hwaddr addr,
 case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_OTR:
 val = 0x0;
 break;
+case PNV9_XSCOM_EC_CORE_THREAD_STATE:
+val = 0;
+break;
 default:
 qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx "\n",
   addr);
@@ -171,6 +176,8 @@ static const MemoryRegionOps pnv_core_power9_xscom_ops = {
  * POWER10 core controls
  */
 
+#define PNV10_XSCOM_EC_CORE_THREAD_STATE0x412
+
 static uint64_t pnv_core_power10_xscom_read(void *opaque, hwaddr addr,
unsigned int width)
 {
@@ -178,6 +185,9 @@ static uint64_t pnv_core_power10_xscom_read(void *opaque, 
hwaddr addr,
 uint64_t val = 0;
 
 switch (offset) {
+case PNV10_XSCOM_EC_CORE_THREAD_STATE:
+val = 0;
+break;
 default:
 qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx "\n",
   addr);
-- 
2.40.1

[PATCH v2 3/5] ppc/pnv: Add P10 quad xscom model

2023-07-03 Thread Joel Stanley

Add a PnvQuad class for the P10 powernv machine. No xscoms are
implemented yet, but this allows them to be added.

The size is reduced to avoid the quad region from overlapping with the
core region.

  address-space: xscom-0
-0003 (prio 0, i/o): xscom-0
  0001-0001000f (prio 0, i/o): xscom-quad.0
  000100108000-000100907fff (prio 0, i/o): xscom-core.3
  00010011-00010090 (prio 0, i/o): xscom-core.2
  00010012-00010091 (prio 0, i/o): xscom-core.1
  00010014-00010093 (prio 0, i/o): xscom-core.0

Signed-off-by: Joel Stanley 
---
v2: Fix unimp read message
Wrap lines at 80 col
Set size
---
 include/hw/ppc/pnv_xscom.h |  2 +-
 hw/ppc/pnv.c   |  2 +-
 hw/ppc/pnv_core.c  | 54 ++
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
index cbe848d27ba0..f7da9a1dc617 100644
--- a/include/hw/ppc/pnv_xscom.h
+++ b/include/hw/ppc/pnv_xscom.h
@@ -129,7 +129,7 @@ struct PnvXScomInterfaceClass {
 
 #define PNV10_XSCOM_EQ_BASE(core) \
 ((uint64_t) PNV10_XSCOM_EQ(PNV10_XSCOM_EQ_CHIPLET(core)))
-#define PNV10_XSCOM_EQ_SIZE0x10
+#define PNV10_XSCOM_EQ_SIZE0x2
 
 #define PNV10_XSCOM_EC_BASE(core) \
 ((uint64_t) PNV10_XSCOM_EQ_BASE(core) | PNV10_XSCOM_EC(core & 0x3))
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index c77fdb6747a4..5f25fe985ab2 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1669,7 +1669,7 @@ static void pnv_chip_power10_quad_realize(Pnv10Chip 
*chip10, Error **errp)
 PnvQuad *eq = &chip10->quads[i];
 
 pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],
-  PNV_QUAD_TYPE_NAME("power9"));
+  PNV_QUAD_TYPE_NAME("power10"));
 
 pnv_xscom_add_subregion(chip, PNV10_XSCOM_EQ_BASE(eq->quad_id),
 &eq->xscom_regs);
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 73d25409c937..e4df435b15e9 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -404,6 +404,47 @@ static const MemoryRegionOps pnv_quad_power9_xscom_ops = {
 .endianness = DEVICE_BIG_ENDIAN,
 };
 
+/*
+ * POWER10 Quads
+ */
+
+static uint64_t pnv_quad_power10_xscom_read(void *opaque, hwaddr addr,
+unsigned int width)
+{
+uint32_t offset = addr >> 3;
+uint64_t val = -1;
+
+switch (offset) {
+default:
+qemu_log_mask(LOG_UNIMP, "%s: reading @0x%08x\n", __func__,
+  offset);
+}
+
+return val;
+}
+
+static void pnv_quad_power10_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned int width)
+{
+uint32_t offset = addr >> 3;
+
+switch (offset) {
+default:
+qemu_log_mask(LOG_UNIMP, "%s: writing @0x%08x\n", __func__,
+  offset);
+}
+}
+
+static const MemoryRegionOps pnv_quad_power10_xscom_ops = {
+.read = pnv_quad_power10_xscom_read,
+.write = pnv_quad_power10_xscom_write,
+.valid.min_access_size = 8,
+.valid.max_access_size = 8,
+.impl.min_access_size = 8,
+.impl.max_access_size = 8,
+.endianness = DEVICE_BIG_ENDIAN,
+};
+
 static void pnv_quad_realize(DeviceState *dev, Error **errp)
 {
 PnvQuad *eq = PNV_QUAD(dev);
@@ -430,6 +471,14 @@ static void pnv_quad_power9_class_init(ObjectClass *oc, 
void *data)
 pqc->xscom_size = PNV9_XSCOM_EQ_SIZE;
 }
 
+static void pnv_quad_power10_class_init(ObjectClass *oc, void *data)
+{
+PnvQuadClass *pqc = PNV_QUAD_CLASS(oc);
+
+pqc->xscom_ops = &pnv_quad_power10_xscom_ops;
+pqc->xscom_size = PNV10_XSCOM_EQ_SIZE;
+}
+
 static void pnv_quad_class_init(ObjectClass *oc, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(oc);
@@ -453,6 +502,11 @@ static const TypeInfo pnv_quad_infos[] = {
 .name = PNV_QUAD_TYPE_NAME("power9"),
 .class_init = pnv_quad_power9_class_init,
 },
+{
+.parent = TYPE_PNV_QUAD,
+.name = PNV_QUAD_TYPE_NAME("power10"),
+.class_init = pnv_quad_power10_class_init,
+},
 };
 
 DEFINE_TYPES(pnv_quad_infos);
-- 
2.40.1

Re: [PATCH v6 5/5] hw/pci: ensure PCIE devices are plugged into only slot 0 of PCIE port

2023-07-03 Thread Ani Sinha




> On 04-Jul-2023, at 10:31 AM, Akihiko Odaki  wrote:
> 
> On 2023/07/03 15:08, Ani Sinha wrote:
>>> On 02-Jul-2023, at 10:29 AM, Michael S. Tsirkin  wrote:
>>> 
>>> On Sat, Jul 01, 2023 at 04:09:31PM +0900, Akihiko Odaki wrote:
 Yes, I want the slot number restriction to be enforced. If it worries you
 too much for regressions, you may implement it as a warning first and then
 turn it a hard error when the next development phase starts.
>>> 
>>> That's not a bad idea.
>> If we had not enforced the check strongly, the tests that we fixed would not 
>> get noticed.
> 
> Perhaps so, but we don't have much time before feature freeze. I rather want 
> to see the check implemented as warning in 8.1 instead of delaying the 
> initial implementation of the check after 8.1 (though I worry if it's already 
> too late for 8.1.)

The feature hard freeze window starts from 12th of next week. So I am still 
debating whether to keep the hard check or just have a warning. If the hard 
check causes regressions, we can always revert it to a warning later.

Re: [PATCH v6 5/5] hw/pci: ensure PCIE devices are plugged into only slot 0 of PCIE port

2023-07-03 Thread Akihiko Odaki


On 2023/07/03 15:08, Ani Sinha wrote:




On 02-Jul-2023, at 10:29 AM, Michael S. Tsirkin  wrote:

On Sat, Jul 01, 2023 at 04:09:31PM +0900, Akihiko Odaki wrote:

Yes, I want the slot number restriction to be enforced. If it worries you
too much for regressions, you may implement it as a warning first and then
turn it a hard error when the next development phase starts.


That's not a bad idea.


If we had not enforced the check strongly, the tests that we fixed would not 
get noticed.



Perhaps so, but we don't have much time before feature freeze. I rather 
want to see the check implemented as warning in 8.1 instead of delaying 
the initial implementation of the check after 8.1 (though I worry if 
it's already too late for 8.1.)

[PATCH v3 2/3] vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_mq()

2023-07-03 Thread Hawkins Jiawei

According to VirtIO standard, "The class, command and
command-specific-data are set by the driver,
and the device sets the ack byte.
There is little it can do except issue a diagnostic
if ack is not VIRTIO_NET_OK."

Therefore, QEMU should stop sending the queued SVQ commands and
cancel the device startup if the device's ack is not VIRTIO_NET_OK.

Yet the problem is that, vhost_vdpa_net_load_mq() returns 1 based on
`*s->status != VIRTIO_NET_OK` when the device's ack is VIRTIO_NET_ERR.
As a result, net->nc->info->load() also returns 1, this makes
vhost_net_start_one() incorrectly assume the device state is
successfully loaded by vhost_vdpa_net_load() and return 0, instead of
goto `fail` label to cancel the device startup, as vhost_net_start_one()
only cancels the device startup when net->nc->info->load() returns a
negative value.

This patch fixes this problem by returning -EIO when the device's
ack is not VIRTIO_NET_OK.

Fixes: f64c7cda69 ("vdpa: Add vhost_vdpa_net_load_mq")
Signed-off-by: Hawkins Jiawei 
Acked-by: Jason Wang 
Acked-by: Eugenio Pérez 
---
v3:
 - split the fixes suggested by Eugenio
 - return -EIO suggested by Michael

v2: 
https://lore.kernel.org/all/69010e9ebb5e3729aef595ed92840f43e48e53e5.1687875592.git.yin31...@gmail.com/
 - fix the same bug in vhost_vdpa_net_load_offloads()

v1: 
https://lore.kernel.org/all/07a1133d6c989394b342e35d8202257771e76769.1686746406.git.yin31...@gmail.com/

 net/vhost-vdpa.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index ee273c40ca..03d87e85c8 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -671,8 +671,11 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
 if (unlikely(dev_written < 0)) {
 return dev_written;
 }
+if (*s->status != VIRTIO_NET_OK) {
+return -EIO;
+}
 
-return *s->status != VIRTIO_NET_OK;
+return 0;
 }
 
 static int vhost_vdpa_net_load_offloads(VhostVDPAState *s,
-- 
2.25.1

[PATCH v3 1/3] vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_mac()

2023-07-03 Thread Hawkins Jiawei

According to VirtIO standard, "The class, command and
command-specific-data are set by the driver,
and the device sets the ack byte.
There is little it can do except issue a diagnostic
if ack is not VIRTIO_NET_OK."

Therefore, QEMU should stop sending the queued SVQ commands and
cancel the device startup if the device's ack is not VIRTIO_NET_OK.

Yet the problem is that, vhost_vdpa_net_load_mac() returns 1 based on
`*s->status != VIRTIO_NET_OK` when the device's ack is VIRTIO_NET_ERR.
As a result, net->nc->info->load() also returns 1, this makes
vhost_net_start_one() incorrectly assume the device state is
successfully loaded by vhost_vdpa_net_load() and return 0, instead of
goto `fail` label to cancel the device startup, as vhost_net_start_one()
only cancels the device startup when net->nc->info->load() returns a
negative value.

This patch fixes this problem by returning -EIO when the device's
ack is not VIRTIO_NET_OK.

Fixes: f73c0c43ac ("vdpa: extract vhost_vdpa_net_load_mac from 
vhost_vdpa_net_load")
Signed-off-by: Hawkins Jiawei 
Acked-by: Jason Wang 
Acked-by: Eugenio Pérez 
---
v3:
 - split the fixes suggested by Eugenio
 - return -EIO suggested by Michael

v2: 
https://lore.kernel.org/all/69010e9ebb5e3729aef595ed92840f43e48e53e5.1687875592.git.yin31...@gmail.com/
 - fix the same bug in vhost_vdpa_net_load_offloads()

v1: 
https://lore.kernel.org/all/07a1133d6c989394b342e35d8202257771e76769.1686746406.git.yin31...@gmail.com/

 net/vhost-vdpa.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index e19ab063fa..ee273c40ca 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -646,8 +646,9 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const 
VirtIONet *n)
 if (unlikely(dev_written < 0)) {
 return dev_written;
 }
-
-return *s->status != VIRTIO_NET_OK;
+if (*s->status != VIRTIO_NET_OK) {
+return -EIO;
+}
 }
 
 return 0;
-- 
2.25.1

[PATCH v3 3/3] vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_offloads()

2023-07-03 Thread Hawkins Jiawei

According to VirtIO standard, "The class, command and
command-specific-data are set by the driver,
and the device sets the ack byte.
There is little it can do except issue a diagnostic
if ack is not VIRTIO_NET_OK."

Therefore, QEMU should stop sending the queued SVQ commands and
cancel the device startup if the device's ack is not VIRTIO_NET_OK.

Yet the problem is that, vhost_vdpa_net_load_offloads() returns 1 based on
`*s->status != VIRTIO_NET_OK` when the device's ack is VIRTIO_NET_ERR.
As a result, net->nc->info->load() also returns 1, this makes
vhost_net_start_one() incorrectly assume the device state is
successfully loaded by vhost_vdpa_net_load() and return 0, instead of
goto `fail` label to cancel the device startup, as vhost_net_start_one()
only cancels the device startup when net->nc->info->load() returns a
negative value.

This patch fixes this problem by returning -EIO when the device's
ack is not VIRTIO_NET_OK.

Fixes: 0b58d3686a ("vdpa: Add vhost_vdpa_net_load_offloads()")
Signed-off-by: Hawkins Jiawei 
Acked-by: Jason Wang 
Acked-by: Eugenio Pérez 
---
v3:
 - split the fixes suggested by Eugenio
 - return -EIO suggested by Michael

v2: 
https://lore.kernel.org/all/69010e9ebb5e3729aef595ed92840f43e48e53e5.1687875592.git.yin31...@gmail.com/
 - fix the same bug in vhost_vdpa_net_load_offloads()

v1: 
https://lore.kernel.org/all/07a1133d6c989394b342e35d8202257771e76769.1686746406.git.yin31...@gmail.com/

 net/vhost-vdpa.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 03d87e85c8..36aa2d7f8c 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -712,8 +712,11 @@ static int vhost_vdpa_net_load_offloads(VhostVDPAState *s,
 if (unlikely(dev_written < 0)) {
 return dev_written;
 }
+if (*s->status != VIRTIO_NET_OK) {
+return -EIO;
+}
 
-return *s->status != VIRTIO_NET_OK;
+return 0;
 }
 
 static int vhost_vdpa_net_load(NetClientState *nc)
-- 
2.25.1

[PATCH v3 0/3] vdpa: Return -EIO if device ack is VIRTIO_NET_ERR

2023-07-03 Thread Hawkins Jiawei

According to VirtIO standard, "The class, command and
command-specific-data are set by the driver,
and the device sets the ack byte.
There is little it can do except issue a diagnostic
if ack is not VIRTIO_NET_OK."

Therefore, QEMU should stop sending the queued SVQ commands and
cancel the device startup if the device's ack is not VIRTIO_NET_OK.

Yet the problem is that, vhost_vdpa_net_load_x() returns 1 based on
`*s->status != VIRTIO_NET_OK` when the device's ack is VIRTIO_NET_ERR.
As a result, net->nc->info->load() also returns 1, this makes
vhost_net_start_one() incorrectly assume the device state is
successfully loaded by vhost_vdpa_net_load() and return 0, instead of
goto `fail` label to cancel the device startup, as vhost_net_start_one()
only cancels the device startup when net->nc->info->load() returns a
negative value.

This patchset fixes this problem by returning -EIO when the device's
ack is not VIRTIO_NET_OK.

Changelog
=
v3:
 - split the fixes suggested by Eugenio
 - return -EIO suggested by Michael

v2: 
https://lore.kernel.org/all/69010e9ebb5e3729aef595ed92840f43e48e53e5.1687875592.git.yin31...@gmail.com/
 - fix the same bug in vhost_vdpa_net_load_offloads()

v1: https://lore.kernel.org/all/cover.1686746406.git.yin31...@gmail.com/

Hawkins Jiawei (3):
  vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_mac()
  vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_mq()
  vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_offloads()

 net/vhost-vdpa.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

-- 
2.25.1

Re: [PATCH 2/2] tpm_crb: mark memory as protected

2023-07-03 Thread Jason Wang

On Thu, Jun 22, 2023 at 9:39 PM Laurent Vivier  wrote:
>
> On 6/22/23 15:12, Peter Maydell wrote:
> > On Tue, 20 Jun 2023 at 20:51, Laurent Vivier  wrote:
> >>
> >> This memory is not correctly aligned and cannot be registered
> >> by vDPA and VFIO.
> >
> > Isn't this a vDPA/VFIO problem? There's no requirement
> > for RAM MemoryRegions to be aligned in any way.

It's more about the limitation of the IOMMU which can't do subpage protection.

> > Code
> > that doesn't want to work with small or weirdly aligned
> > regions should skip them if that's the right behaviour
> > for that particular code IMHO.

We had already had this:

if ((!memory_region_is_ram(section->mr) &&
 !memory_region_is_iommu(section->mr)) ||
memory_region_is_protected(section->mr) ||
/* vhost-vDPA doesn't allow MMIO to be mapped  */
memory_region_is_ram_device(section->mr)) {
return true;
}

> >
>
> Marc-André proposed to modify vDPA code to skip the region but Michal 
> disagreed:
>
> https://lists.nongnu.org/archive/html/qemu-devel/2022-11/msg03670.html
>
> No one wants the modification, so the problem cannot be fixed.
>

Yes, otherwise we end up with explicit check for TPM crb in vhost code...

Thanks

> Thanks,
> Laurent
>

[PATCH v2 1/2] target/riscv: Remove redundant check in pmp_is_locked

2023-07-03 Thread Ruibo Lu

the check of top PMP is redundant and will not influence the return
value, so consider remove it

Reviewed-by: Weiwei Li 
Reviewed-by: Alistair Francis 
Signed-off-by: Ruibo Lu 
---
 target/riscv/pmp.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c
index 9d8db493e6..1a9279ba88 100644
--- a/target/riscv/pmp.c
+++ b/target/riscv/pmp.c
@@ -49,11 +49,6 @@ static inline int pmp_is_locked(CPURISCVState *env, uint32_t 
pmp_index)
 return 1;
 }
 
-/* Top PMP has no 'next' to check */
-if ((pmp_index + 1u) >= MAX_RISCV_PMPS) {
-return 0;
-}
-
 return 0;
 }
 
-- 
2.41.0

[PATCH v2 2/2] target/riscv: Optimize ambiguous local variable in pmp_hart_has_privs

2023-07-03 Thread Ruibo Lu

These two values represents whether start/end address is in pmp_range.
However, the type and name of them is ambiguous. This commit change the
name and type of them to improve code readability and accuracy.

Reviewed-by: Weiwei Li 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Ruibo Lu 
---
 target/riscv/pmp.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c
index 1a9279ba88..ea3d29217a 100644
--- a/target/riscv/pmp.c
+++ b/target/riscv/pmp.c
@@ -203,16 +203,16 @@ void pmp_update_rule_nums(CPURISCVState *env)
 }
 }
 
-static int pmp_is_in_range(CPURISCVState *env, int pmp_index,
-   target_ulong addr)
+static bool pmp_is_in_range(CPURISCVState *env, int pmp_index,
+target_ulong addr)
 {
-int result = 0;
+bool result = false;
 
 if ((addr >= env->pmp_state.addr[pmp_index].sa) &&
 (addr <= env->pmp_state.addr[pmp_index].ea)) {
-result = 1;
+result = true;
 } else {
-result = 0;
+result = false;
 }
 
 return result;
@@ -287,8 +287,8 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong 
addr,
 {
 int i = 0;
 int pmp_size = 0;
-target_ulong s = 0;
-target_ulong e = 0;
+bool sa_in = false;
+bool ea_in = false;
 
 /* Short cut if no rules */
 if (0 == pmp_get_num_rules(env)) {
@@ -314,11 +314,11 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong 
addr,
  * from low to high
  */
 for (i = 0; i < MAX_RISCV_PMPS; i++) {
-s = pmp_is_in_range(env, i, addr);
-e = pmp_is_in_range(env, i, addr + pmp_size - 1);
+sa_in = pmp_is_in_range(env, i, addr);
+ea_in = pmp_is_in_range(env, i, addr + pmp_size - 1);
 
 /* partially inside */
-if ((s + e) == 1) {
+if (sa_in ^ ea_in) {
 qemu_log_mask(LOG_GUEST_ERROR,
   "pmp violation - access is partially inside\n");
 *allowed_privs = 0;
@@ -339,7 +339,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong 
addr,
 (env->pmp_state.pmp[i].cfg_reg & PMP_WRITE) |
 ((env->pmp_state.pmp[i].cfg_reg & PMP_EXEC) >> 2);
 
-if (((s + e) == 2) && (PMP_AMATCH_OFF != a_field)) {
+if (sa_in && ea_in && (PMP_AMATCH_OFF != a_field)) {
 /*
  * If the PMP entry is not off and the address is in range,
  * do the priv check
-- 
2.41.0

RE: [PATCH v6 5/7] vfio/migration: Free resources when vfio_migration_realize fails

2023-07-03 Thread Duan, Zhenzhong

>-Original Message-
>From: Cédric Le Goater 
>Sent: Monday, July 3, 2023 11:45 PM
>Subject: Re: [PATCH v6 5/7] vfio/migration: Free resources when
>vfio_migration_realize fails
>
>On 7/3/23 09:15, Zhenzhong Duan wrote:
>> When vfio_realize() succeeds, hot unplug will call vfio_exitfn() to
>> free resources allocated in vfio_realize(); when vfio_realize() fails,
>> vfio_exitfn() is never called and we need to free resources in
>> vfio_realize().
>>
>> In the case that vfio_migration_realize() fails,
>> e.g: with -only-migratable & enable-migration=off, we see below:
>>
>> (qemu) device_add
>> vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off
>> :81:11.1: Migration disabled
>> Error: disallowing migration blocker (--only-migratable) for:
>> :81:11.1: Migration is disabled for VFIO device
>>
>> If we hotplug again we should see same log as above, but we see:
>> (qemu) device_add
>> vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off
>> Error: vfio :81:11.1: device is already attached
>>
>> That's because some references to VFIO device isn't released.
>> For resources allocated in vfio_migration_realize(), free them by
>> jumping to out_deinit path with calling a new function
>> vfio_migration_deinit(). For resources allocated in vfio_realize(),
>> free them by jumping to de-register path in vfio_realize().
>>
>> Signed-off-by: Zhenzhong Duan 
>
>The vfio_migration_realize() routine is somewhat difficult to follow but I 
>don't
>see how to improve it. May be could move the viommu test at the beginning ?

Is your purpose to remove vfio_unblock_multiple_devices_migration() from
vfio_migration_deinit()? Or other benefit I misses?

Thanks
Zhenzhong

RE: [PATCH v6 5/7] vfio/migration: Free resources when vfio_migration_realize fails

2023-07-03 Thread Duan, Zhenzhong

>-Original Message-
>From: Duan, Zhenzhong 
>Sent: Monday, July 3, 2023 3:15 PM
>To: qemu-devel@nongnu.org
>Cc: alex.william...@redhat.com; c...@redhat.com; Martins, Joao
>; avih...@nvidia.com; Peng, Chao P
>
>Subject: [PATCH v6 5/7] vfio/migration: Free resources when
>vfio_migration_realize fails
>
>When vfio_realize() succeeds, hot unplug will call vfio_exitfn() to free
>resources allocated in vfio_realize(); when vfio_realize() fails, 
>vfio_exitfn() is
>never called and we need to free resources in vfio_realize().
>
>In the case that vfio_migration_realize() fails,
>e.g: with -only-migratable & enable-migration=off, we see below:
>
>(qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-
>migration=off
>:81:11.1: Migration disabled
>Error: disallowing migration blocker (--only-migratable) for: :81:11.1:
>Migration is disabled for VFIO device
>
>If we hotplug again we should see same log as above, but we see:
>(qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-
>migration=off
>Error: vfio :81:11.1: device is already attached
>
>That's because some references to VFIO device isn't released.
>For resources allocated in vfio_migration_realize(), free them by jumping to
>out_deinit path with calling a new function vfio_migration_deinit(). For
>resources allocated in vfio_realize(), free them by jumping to de-register path
>in vfio_realize().
>

Forgot fixes tag:

Fixes: a22651053b59 ("vfio: Make vfio-pci device migration capable")

Thanks
Zhenzhong

>Signed-off-by: Zhenzhong Duan 
>---
> hw/vfio/migration.c | 33 +++--
> hw/vfio/pci.c   |  1 +
> 2 files changed, 24 insertions(+), 10 deletions(-)
>
>diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index
>e6e5e85f7580..e3954570c853 100644
>--- a/hw/vfio/migration.c
>+++ b/hw/vfio/migration.c
>@@ -802,6 +802,17 @@ static int vfio_migration_init(VFIODevice *vbasedev)
> return 0;
> }
>
>+static void vfio_migration_deinit(VFIODevice *vbasedev) {
>+VFIOMigration *migration = vbasedev->migration;
>+
>+remove_migration_state_change_notifier(&migration->migration_state);
>+qemu_del_vm_change_state_handler(migration->vm_state);
>+unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
>+vfio_migration_free(vbasedev);
>+vfio_unblock_multiple_devices_migration();
>+}
>+
> static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error
>**errp)  {
> int ret;
>@@ -866,7 +877,7 @@ int vfio_migration_realize(VFIODevice *vbasedev,
>Error **errp)
> error_setg(&err,
>"%s: VFIO device doesn't support device dirty 
> tracking",
>vbasedev->name);
>-return vfio_block_migration(vbasedev, err, errp);
>+goto add_blocker;
> }
>
> warn_report("%s: VFIO device doesn't support device dirty tracking",
>@@ -875,29 +886,31 @@ int vfio_migration_realize(VFIODevice *vbasedev,
>Error **errp)
>
> ret = vfio_block_multiple_devices_migration(vbasedev, errp);
> if (ret) {
>-return ret;
>+goto out_deinit;
> }
>
> if (vfio_viommu_preset(vbasedev)) {
> error_setg(&err, "%s: Migration is currently not supported "
>"with vIOMMU enabled", vbasedev->name);
>-return vfio_block_migration(vbasedev, err, errp);
>+goto add_blocker;
> }
>
> trace_vfio_migration_realize(vbasedev->name);
> return 0;
>+
>+add_blocker:
>+ret = vfio_block_migration(vbasedev, err, errp);
>+out_deinit:
>+if (ret) {
>+vfio_migration_deinit(vbasedev);
>+}
>+return ret;
> }
>
> void vfio_migration_exit(VFIODevice *vbasedev)  {
> if (vbasedev->migration) {
>-VFIOMigration *migration = vbasedev->migration;
>-
>-remove_migration_state_change_notifier(&migration->migration_state);
>-qemu_del_vm_change_state_handler(migration->vm_state);
>-unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
>-vfio_migration_free(vbasedev);
>-vfio_unblock_multiple_devices_migration();
>+vfio_migration_deinit(vbasedev);
> }
>
> if (vbasedev->migration_blocker) {
>diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index c2cf7454ece6..9154dd929d07
>100644
>--- a/hw/vfio/pci.c
>+++ b/hw/vfio/pci.c
>@@ -3210,6 +3210,7 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
> ret = vfio_migration_realize(vbasedev, errp);
> if (ret) {
> error_report("%s: Migration disabled", vbasedev->name);
>+goto out_deregister;
> }
> }
>
>--
>2.34.1

Re: [PATCH RFC 1/1] vdpa: Return -EINVAL if device's ack is VIRTIO_NET_ERR

2023-07-03 Thread Hawkins Jiawei

On 2023/7/4 0:52, Michael S. Tsirkin wrote:
> On Wed, Jun 14, 2023 at 09:01:47PM +0800, Hawkins Jiawei wrote:
>> According to VirtIO standard, "The class, command and
>> command-specific-data are set by the driver,
>> and the device sets the ack byte.
>> There is little it can do except issue a diagnostic
>> if ack is not VIRTIO_NET_OK."
>>
>> Therefore, QEMU should stop sending the queued SVQ commands and
>> cancel the device startup if the device's ack is not VIRTIO_NET_OK.
>>
>> Yet the problem is that, vhost_vdpa_net_load_x() returns 1 based on
>> `*s->status != VIRTIO_NET_OK` when the device's ack is VIRTIO_NET_ERR.
>> As a result, net->nc->info->load() also returns 1, this makes
>> vhost_net_start_one() incorrectly assume the device state is
>> successfully loaded by vhost_vdpa_net_load() and return 0, instead of
>> goto `fail` label to cancel the device startup, as vhost_net_start_one()
>> only cancels the device startup when net->nc->info->load() returns a
>> negative value.
>>
>> This patch fixes this problem by returning -EINVAL when the device's
>> ack is not VIRTIO_NET_OK.
>>
>> Fixes: f73c0c43ac ("vdpa: extract vhost_vdpa_net_load_mac from 
>> vhost_vdpa_net_load")
>> Fixes: f64c7cda69 ("vdpa: Add vhost_vdpa_net_load_mq")
>> Signed-off-by: Hawkins Jiawei 
>> ---
>>   net/vhost-vdpa.c | 10 +++---
>>   1 file changed, 7 insertions(+), 3 deletions(-)
>>
>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>> index 37cdc84562..630c9bf71e 100644
>> --- a/net/vhost-vdpa.c
>> +++ b/net/vhost-vdpa.c
>> @@ -651,8 +651,9 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, 
>> const VirtIONet *n)
>>   if (unlikely(dev_written < 0)) {
>>   return dev_written;
>>   }
>> -
>> -return *s->status != VIRTIO_NET_OK;
>> +if (*s->status != VIRTIO_NET_OK) {
>> +return -EINVAL;
>> +}
>>   }
>>
>>   return 0;
>> @@ -676,8 +677,11 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
>>   if (unlikely(dev_written < 0)) {
>>   return dev_written;
>>   }
>> +if (*s->status != VIRTIO_NET_OK) {
>> +return -EINVAL;
>> +}
>>
>> -return *s->status != VIRTIO_NET_OK;
>> +return 0;
>>   }
>
> I think EIO would be better, we have too many EINVAL cases,
> making things hard to debug.

I will refactor this patch to return -EIO according to your suggestion.

Thanks!

>
>
>>

>>   static int vhost_vdpa_net_load(NetClientState *nc)
>> --
>> 2.25.1
>

Re: [PATCH RFC 1/1] vdpa: Return -EINVAL if device's ack is VIRTIO_NET_ERR

2023-07-03 Thread Hawkins Jiawei

On 2023/7/4 0:38, Eugenio Perez Martin wrote:
> On Wed, Jun 14, 2023 at 3:02 PM Hawkins Jiawei  wrote:
>>
>> According to VirtIO standard, "The class, command and
>> command-specific-data are set by the driver,
>> and the device sets the ack byte.
>> There is little it can do except issue a diagnostic
>> if ack is not VIRTIO_NET_OK."
>>
>> Therefore, QEMU should stop sending the queued SVQ commands and
>> cancel the device startup if the device's ack is not VIRTIO_NET_OK.
>>
>> Yet the problem is that, vhost_vdpa_net_load_x() returns 1 based on
>> `*s->status != VIRTIO_NET_OK` when the device's ack is VIRTIO_NET_ERR.
>> As a result, net->nc->info->load() also returns 1, this makes
>> vhost_net_start_one() incorrectly assume the device state is
>> successfully loaded by vhost_vdpa_net_load() and return 0, instead of
>> goto `fail` label to cancel the device startup, as vhost_net_start_one()
>> only cancels the device startup when net->nc->info->load() returns a
>> negative value.
>>
>> This patch fixes this problem by returning -EINVAL when the device's
>> ack is not VIRTIO_NET_OK.
>>
>> Fixes: f73c0c43ac ("vdpa: extract vhost_vdpa_net_load_mac from 
>> vhost_vdpa_net_load")
>> Fixes: f64c7cda69 ("vdpa: Add vhost_vdpa_net_load_mq")
>> Signed-off-by: Hawkins Jiawei 
>
> Maybe we could split the fixes? Either way:

OK, I will split these fixes according to your suggestion.

Thanks!


>
> Acked-by: Eugenio Pérez 
>
> Thanks!
>
>> ---
>>   net/vhost-vdpa.c | 10 +++---
>>   1 file changed, 7 insertions(+), 3 deletions(-)
>>
>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>> index 37cdc84562..630c9bf71e 100644
>> --- a/net/vhost-vdpa.c
>> +++ b/net/vhost-vdpa.c
>> @@ -651,8 +651,9 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, 
>> const VirtIONet *n)
>>   if (unlikely(dev_written < 0)) {
>>   return dev_written;
>>   }
>> -
>> -return *s->status != VIRTIO_NET_OK;
>> +if (*s->status != VIRTIO_NET_OK) {
>> +return -EINVAL;
>> +}
>>   }
>>
>>   return 0;
>> @@ -676,8 +677,11 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
>>   if (unlikely(dev_written < 0)) {
>>   return dev_written;
>>   }
>> +if (*s->status != VIRTIO_NET_OK) {
>> +return -EINVAL;
>> +}
>>
>> -return *s->status != VIRTIO_NET_OK;
>> +return 0;
>>   }
>>
>>   static int vhost_vdpa_net_load(NetClientState *nc)
>> --
>> 2.25.1
>>
>

Re: [PATCH RESEND v2 2/2] target/i386/kvm: get and put AMD pmu registers

2023-07-03 Thread Dongli Zhang

Hi Like,

On 7/2/23 07:15, Like Xu wrote:
> On Wed, Jun 21, 2023 at 9:39 AM Dongli Zhang  wrote:
>>
>> The QEMU side calls kvm_get_msrs() to save the pmu registers from the KVM
>> side to QEMU, and calls kvm_put_msrs() to store the pmu registers back to
>> the KVM side.
>>
>> However, only the Intel gp/fixed/global pmu registers are involved. There
>> is not any implementation for AMD pmu registers. The
>> 'has_architectural_pmu_version' and 'num_architectural_pmu_gp_counters' are
>> calculated at kvm_arch_init_vcpu() via cpuid(0xa). This does not work for
>> AMD. Before AMD PerfMonV2, the number of gp registers is decided based on
>> the CPU version.
> 
> Updating the relevant documentation to clarify this part of the deficiency
> would be a good first step.

Would you mind suggesting the doc to add this TODO/deficiency?

The only place I find is to add a new TODO under docs/system/i386, but not sure
if it is worth it. This bugfix is not complex.

> 
>>
>> This patch is to add the support for AMD version=1 pmu, to get and put AMD
>> pmu registers. Otherwise, there will be a bug:
> 
> AMD version=1 ?
> 
> AMD does not have version 1, just directly has 2, perhaps because of x86
> compatibility. AMD also does not have the so-called architectural pmu.
> Maybe need to rename has_architectural_pmu_version for AMD.
> 

Thank you very much for the explanation. I will use version 2.

> It might be more helpful to add similar support for AMD PerfMonV2.

Yes. I will do that. During that time, the AMD PerfMonV2 KVM patchset (from you)
was still in progress.

I see the pull request from Paolo today. I will add that in v3.

> 
>>
>> 1. The VM resets (e.g., via QEMU system_reset or VM kdump/kexec) while it
>> is running "perf top". The pmu registers are not disabled gracefully.
>>
>> 2. Although the x86_cpu_reset() resets many registers to zero, the
>> kvm_put_msrs() does not puts AMD pmu registers to KVM side. As a result,
>> some pmu events are still enabled at the KVM side.
> 
> I agree that we should have done that, especially if guest pmu is enabled
> on the AMD platforms.
> 
>>
>> 3. The KVM pmc_speculative_in_use() always returns true so that the events
>> will not be reclaimed. The kvm_pmc->perf_event is still active.
>>
>> 4. After the reboot, the VM kernel reports below error:
>>
>> [0.092011] Performance Events: Fam17h+ core perfctr, Broken BIOS 
>> detected, complain to your hardware vendor.
>> [0.092023] [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 
>> c0010200 is 530076)
>>
>> 5. In a worse case, the active kvm_pmc->perf_event is still able to
>> inject unknown NMIs randomly to the VM kernel.
>>
>> [...] Uhhuh. NMI received for unknown reason 30 on CPU 0.
>>
>> The patch is to fix the issue by resetting AMD pmu registers during the
>> reset.
> 
> I'm not sure if the qemu_reset or VM kexec will necessarily trigger
> kvm::amd_pmu_reset().

According to the mainline linux kernel:

kvm_vcpu_reset()
-> kvm_pmu_reset()
   -> amd_pmu_reset()

The PMU will not reset when init_event==true, that is, when processing the INIT:
line 12049.

11975 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
11976 {
... ...
12049 if (!init_event) {
12050 kvm_pmu_reset(vcpu);
12051 vcpu->arch.smbase = 0x3;
12052
12053 vcpu->arch.msr_misc_features_enables = 0;
12054 vcpu->arch.ia32_misc_enable_msr =
MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
12055
MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
12056
12057 __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
12058 __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
12059 }

According to the below ...

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d28bc9dd25ce023270d2e039e7c98d38ecbf7758

... "INIT does not initialize the FPU (including MMX, XMM, YMM, etc.), TSC, PMU,
MSRs (in general), MTRRs machine-check, APIC ID, APIC arbitration ID and BSP."

That's why initially I did not send a KVM patch to remove the 'init_event'.

> 
>>
>> Cc: Joe Jin 
>> Cc: Like Xu 
>> Signed-off-by: Dongli Zhang 
>> ---
>>  target/i386/cpu.h |  5 +++
>>  target/i386/kvm/kvm.c | 83 +--
>>  2 files changed, 86 insertions(+), 2 deletions(-)
>>
>> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
>> index cd047e0410..b8ba72e87a 100644
>> --- a/target/i386/cpu.h
>> +++ b/target/i386/cpu.h
>> @@ -471,6 +471,11 @@ typedef enum X86Seg {
>>  #define MSR_CORE_PERF_GLOBAL_CTRL   0x38f
>>  #define MSR_CORE_PERF_GLOBAL_OVF_CTRL   0x390
>>
>> +#define MSR_K7_EVNTSEL0 0xc001
>> +#define MSR_K7_PERFCTR0 0xc0010004
>> +#define MSR_F15H_PERF_CTL0  0xc0010200
>> +#define MSR_F15H_PERF_CTR0  0xc0010201
>> +
>>  #define MSR_MC0_CTL 0x400
>>  #define MSR_MC0_STATUS  0x401
>>  #define MSR_MC0_ADDR0x402
>> diff --git a/target/i38

Re: [PATCH] riscv: Generate devicetree only after machine initialization is complete

2023-07-03 Thread Daniel Henrique Barboza





On 7/3/23 18:18, Guenter Roeck wrote:

On 7/3/23 12:25, Daniel Henrique Barboza wrote:

On 7/3/23 00:46, Guenter Roeck wrote:

If the devicetree is created before machine initialization is complete,
it misses dynamic devices. Specifically, the tpm device is not added
to the devicetree file and is therefore not instantiated in Linux.
Create devicetree in virt_machine_done() to solve the problem.

Cc: Alistair Francis 
Fixes: 325b7c4e75 hw/riscv: Enable TPM backends
Signed-off-by: Guenter Roeck 
---
  hw/riscv/virt.c | 9 ++---
  1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index ed4c27487e..08876284f5 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1248,6 +1248,11 @@ static void virt_machine_done(Notifier *notifier, void 
*data)
  uint64_t kernel_entry = 0;
  BlockBackend *pflash_blk0;
+    /* create devicetree if not provided */
+    if (!machine->dtb) {
+    create_fdt(s, memmap);
+    }
+


I suggest moving the entire load/create DT code from virt_machine_init() to
the start of virt_machine_done():

 /* load/create device tree */
 if (machine->dtb) {
 machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
 if (!machine->fdt) {
 error_report("load_device_tree() failed");
 exit(1);
 }
 } else {
 create_fdt(s, memmap);
 }

This way we don't have to look in to 2 different functions to wonder what 
happens
in case machine->dtb is NULL.



I can do that, but I don't know how to test it. Is there a working dtb/machine
combination for riscv which would let me test loading a devicetree file ?


I recommend using your own setup with TPM (I'm assuming you're using a TPM 
setup),
generate a .dtb from it, and then launch it using '-dtb'.

First you need the patch applied (otherwise there won't be a TPM in the FDT).
After that, relaunch the same machine again but appending in the end of the
command line:

-machine dumpdtb=file.dtb

This will create a 'file.dtb' file in the working dir and exit. After that 
re-launch
the machine again but now append:

-dtb file.dtb

And you should be able to boot a 'virt' machine with TPM support.


Thanks,

Daniel





Guenter

[PATCH 13/13] ppc440_pcix: Stop using system io region for PCI bus

2023-07-03 Thread BALATON Zoltan

Use the iomem region for the PCI io space and map it directly from the
board without an intermediate alias that is not really needed.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440_pcix.c | 8 +---
 hw/ppc/sam460ex.c| 6 +-
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c
index ee2dc44f67..cca8a72c72 100644
--- a/hw/ppc/ppc440_pcix.c
+++ b/hw/ppc/ppc440_pcix.c
@@ -490,10 +490,11 @@ static void ppc440_pcix_realize(DeviceState *dev, Error 
**errp)
 s = PPC440_PCIX_HOST(dev);
 
 sysbus_init_irq(sbd, &s->irq);
-memory_region_init(&s->busmem, OBJECT(dev), "pci bus memory", UINT64_MAX);
+memory_region_init(&s->busmem, OBJECT(dev), "pci-mem", UINT64_MAX);
+memory_region_init(&s->iomem, OBJECT(dev), "pci-io", 0x1);
 h->bus = pci_register_root_bus(dev, NULL, ppc440_pcix_set_irq,
- ppc440_pcix_map_irq, &s->irq, &s->busmem,
- get_system_io(), PCI_DEVFN(0, 0), 1, TYPE_PCI_BUS);
+ ppc440_pcix_map_irq, &s->irq, &s->busmem, &s->iomem,
+ PCI_DEVFN(0, 0), 1, TYPE_PCI_BUS);
 
 s->dev = pci_create_simple(h->bus, PCI_DEVFN(0, 0),
TYPE_PPC4xx_HOST_BRIDGE);
@@ -514,6 +515,7 @@ static void ppc440_pcix_realize(DeviceState *dev, Error 
**errp)
 memory_region_add_subregion(&s->container, PCIC0_CFGDATA, &h->data_mem);
 memory_region_add_subregion(&s->container, PPC440_REG_BASE, regs);
 sysbus_init_mmio(sbd, &s->container);
+sysbus_init_mmio(sbd, &s->iomem);
 }
 
 static void ppc440_pcix_class_init(ObjectClass *klass, void *data)
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index 8d0e551d14..1e615b8d35 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -269,7 +269,6 @@ static void main_cpu_reset(void *opaque)
 
 static void sam460ex_init(MachineState *machine)
 {
-MemoryRegion *isa = g_new(MemoryRegion, 1);
 MemoryRegion *l2cache_ram = g_new(MemoryRegion, 1);
 DeviceState *uic[4];
 int i;
@@ -441,12 +440,9 @@ static void sam460ex_init(MachineState *machine)
 /* All PCI irqs are connected to the same UIC pin (cf. UBoot source) */
 dev = sysbus_create_simple(TYPE_PPC440_PCIX_HOST, 0xc0ec0,
qdev_get_gpio_in(uic[1], 0));
+sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, 0xc0800);
 pci_bus = PCI_BUS(qdev_get_child_bus(dev, "pci.0"));
 
-memory_region_init_alias(isa, NULL, "isa_mmio", get_system_io(),
- 0, 0x1);
-memory_region_add_subregion(get_system_memory(), 0xc0800, isa);
-
 /* PCI devices */
 pci_create_simple(pci_bus, PCI_DEVFN(6, 0), "sm501");
 /* SoC has a single SATA port but we don't emulate that yet
-- 
2.30.9

[PATCH 01/13] ppc440: Change ppc460ex_pcie_init() parameter type

2023-07-03 Thread BALATON Zoltan

Change parameter of ppc460ex_pcie_init() from env to cpu to allow
further refactoring.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440.h| 2 +-
 hw/ppc/ppc440_uc.c | 7 ---
 hw/ppc/sam460ex.c  | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/hw/ppc/ppc440.h b/hw/ppc/ppc440.h
index 7c24db8504..ae42bcf0c8 100644
--- a/hw/ppc/ppc440.h
+++ b/hw/ppc/ppc440.h
@@ -18,6 +18,6 @@ void ppc4xx_cpr_init(CPUPPCState *env);
 void ppc4xx_sdr_init(CPUPPCState *env);
 void ppc4xx_ahb_init(CPUPPCState *env);
 void ppc4xx_dma_init(CPUPPCState *env, int dcr_base);
-void ppc460ex_pcie_init(CPUPPCState *env);
+void ppc460ex_pcie_init(PowerPCCPU *cpu);
 
 #endif /* PPC440_H */
diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
index 651263926e..8eb985d714 100644
--- a/hw/ppc/ppc440_uc.c
+++ b/hw/ppc/ppc440_uc.c
@@ -17,6 +17,7 @@
 #include "hw/qdev-properties.h"
 #include "hw/pci/pci.h"
 #include "sysemu/reset.h"
+#include "cpu.h"
 #include "ppc440.h"
 
 /*/
@@ -1108,17 +1109,17 @@ static void 
ppc460ex_pcie_register_dcrs(PPC460EXPCIEState *s, CPUPPCState *env)
  &dcr_read_pcie, &dcr_write_pcie);
 }
 
-void ppc460ex_pcie_init(CPUPPCState *env)
+void ppc460ex_pcie_init(PowerPCCPU *cpu)
 {
 DeviceState *dev;
 
 dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
 qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE0_BASE);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-ppc460ex_pcie_register_dcrs(PPC460EX_PCIE_HOST(dev), env);
+ppc460ex_pcie_register_dcrs(PPC460EX_PCIE_HOST(dev), &cpu->env);
 
 dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
 qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE1_BASE);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-ppc460ex_pcie_register_dcrs(PPC460EX_PCIE_HOST(dev), env);
+ppc460ex_pcie_register_dcrs(PPC460EX_PCIE_HOST(dev), &cpu->env);
 }
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index cf065aae0e..aaa8d2f4a5 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -422,7 +422,7 @@ static void sam460ex_init(MachineState *machine)
 usb_create_simple(usb_bus_find(-1), "usb-mouse");
 
 /* PCI bus */
-ppc460ex_pcie_init(env);
+ppc460ex_pcie_init(cpu);
 /* All PCI irqs are connected to the same UIC pin (cf. UBoot source) */
 dev = sysbus_create_simple("ppc440-pcix-host", 0xc0ec0,
qdev_get_gpio_in(uic[1], 0));
-- 
2.30.9

[PATCH 00/13] PPC440 devices misc clean up

2023-07-03 Thread BALATON Zoltan

These are some small misc clean ups to PPC440 related device models
which is all I have ready for now.

BALATON Zoltan (13):
  ppc440: Change ppc460ex_pcie_init() parameter type
  ppc440: Add cpu link property to PCIe controller model
  ppc440: Add a macro to shorten PCIe controller DCR registration
  ppc440: Rename local variable in dcr_read_pcie()
  ppc440: Stop using system io region for PCIe buses
  sam460ex: Remove address_space_mem local variable
  ppc440: Add busnum property to PCIe controller model
  ppc440: Remove ppc460ex_pcie_init legacy init function
  ppc4xx_pci: Rename QOM type name define
  ppc4xx_pci: Add define for ppc4xx-host-bridge type name
  ppc440_pcix: Rename QOM type define abd move it to common header
  ppc440_pcix: Don't use iomem for regs
  ppc440_pcix: Stop using system io region for PCI bus

 hw/ppc/ppc440.h |   1 -
 hw/ppc/ppc440_bamboo.c  |   3 +-
 hw/ppc/ppc440_pcix.c|  27 +++---
 hw/ppc/ppc440_uc.c  | 190 +---
 hw/ppc/ppc4xx_pci.c |  10 +--
 hw/ppc/sam460ex.c   |  33 ---
 include/hw/ppc/ppc4xx.h |   5 +-
 7 files changed, 127 insertions(+), 142 deletions(-)

-- 
2.30.9

[PATCH 10/13] ppc4xx_pci: Add define for ppc4xx-host-bridge type name

2023-07-03 Thread BALATON Zoltan

Add a QOM type name define for ppc4xx-host-bridge in the common header
and replace direct use of the string name with the constant.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440_pcix.c| 3 ++-
 hw/ppc/ppc4xx_pci.c | 4 ++--
 include/hw/ppc/ppc4xx.h | 1 +
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c
index f10f93c533..dfec25ac83 100644
--- a/hw/ppc/ppc440_pcix.c
+++ b/hw/ppc/ppc440_pcix.c
@@ -495,7 +495,8 @@ static void ppc440_pcix_realize(DeviceState *dev, Error 
**errp)
  ppc440_pcix_map_irq, &s->irq, &s->busmem,
  get_system_io(), PCI_DEVFN(0, 0), 1, TYPE_PCI_BUS);
 
-s->dev = pci_create_simple(h->bus, PCI_DEVFN(0, 0), "ppc4xx-host-bridge");
+s->dev = pci_create_simple(h->bus, PCI_DEVFN(0, 0),
+   TYPE_PPC4xx_HOST_BRIDGE);
 
 memory_region_init(&s->bm, OBJECT(s), "bm-ppc440-pcix", UINT64_MAX);
 memory_region_add_subregion(&s->bm, 0x0, &s->busmem);
diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c
index fbdf8266d8..6652119008 100644
--- a/hw/ppc/ppc4xx_pci.c
+++ b/hw/ppc/ppc4xx_pci.c
@@ -333,7 +333,7 @@ static void ppc4xx_pcihost_realize(DeviceState *dev, Error 
**errp)
   TYPE_PCI_BUS);
 h->bus = b;
 
-pci_create_simple(b, 0, "ppc4xx-host-bridge");
+pci_create_simple(b, 0, TYPE_PPC4xx_HOST_BRIDGE);
 
 /* XXX split into 2 memory regions, one for config space, one for regs */
 memory_region_init(&s->container, OBJECT(s), "pci-container", 
PCI_ALL_SIZE);
@@ -367,7 +367,7 @@ static void ppc4xx_host_bridge_class_init(ObjectClass 
*klass, void *data)
 }
 
 static const TypeInfo ppc4xx_host_bridge_info = {
-.name  = "ppc4xx-host-bridge",
+.name  = TYPE_PPC4xx_HOST_BRIDGE,
 .parent= TYPE_PCI_DEVICE,
 .instance_size = sizeof(PCIDevice),
 .class_init= ppc4xx_host_bridge_class_init,
diff --git a/include/hw/ppc/ppc4xx.h b/include/hw/ppc/ppc4xx.h
index e053b9751b..766d575e86 100644
--- a/include/hw/ppc/ppc4xx.h
+++ b/include/hw/ppc/ppc4xx.h
@@ -29,6 +29,7 @@
 #include "exec/memory.h"
 #include "hw/sysbus.h"
 
+#define TYPE_PPC4xx_HOST_BRIDGE "ppc4xx-host-bridge"
 #define TYPE_PPC4xx_PCI_HOST "ppc4xx-pci-host"
 #define TYPE_PPC460EX_PCIE_HOST "ppc460ex-pcie-host"
 
-- 
2.30.9

[PATCH 05/13] ppc440: Stop using system io region for PCIe buses

2023-07-03 Thread BALATON Zoltan

Add separate memory regions for the mem and io spaces of the PCIe bus
to avoid different buses using the same system io region.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440_uc.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
index 38ee27f437..0c5d999878 100644
--- a/hw/ppc/ppc440_uc.c
+++ b/hw/ppc/ppc440_uc.c
@@ -776,6 +776,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PPC460EXPCIEState, 
PPC460EX_PCIE_HOST)
 struct PPC460EXPCIEState {
 PCIExpressHost host;
 
+MemoryRegion busmem;
 MemoryRegion iomem;
 qemu_irq irq[4];
 int32_t dcrn_base;
@@ -1056,15 +1057,17 @@ static void ppc460ex_pcie_realize(DeviceState *dev, 
Error **errp)
 error_setg(errp, "invalid PCIe DCRN base");
 return;
 }
+snprintf(buf, sizeof(buf), "pcie%d-mem", id);
+memory_region_init(&s->busmem, OBJECT(s), buf, UINT64_MAX);
 snprintf(buf, sizeof(buf), "pcie%d-io", id);
-memory_region_init(&s->iomem, OBJECT(s), buf, UINT64_MAX);
+memory_region_init(&s->iomem, OBJECT(s), buf, 0x1);
 for (i = 0; i < 4; i++) {
 sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq[i]);
 }
 snprintf(buf, sizeof(buf), "pcie.%d", id);
 pci->bus = pci_register_root_bus(DEVICE(s), buf, ppc460ex_set_irq,
-pci_swizzle_map_irq_fn, s, &s->iomem,
-get_system_io(), 0, 4, TYPE_PCIE_BUS);
+pci_swizzle_map_irq_fn, s, &s->busmem,
+&s->iomem, 0, 4, TYPE_PCIE_BUS);
 ppc460ex_pcie_register_dcrs(s);
 }
 
-- 
2.30.9

[PATCH 09/13] ppc4xx_pci: Rename QOM type name define

2023-07-03 Thread BALATON Zoltan

Rename the TYPE_PPC4xx_PCI_HOST_BRIDGE define and its string value to
match each other and other similar types and to avoid confusion with
"ppc4xx-host-bridge" type defined in same file.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440_bamboo.c  | 3 +--
 hw/ppc/ppc4xx_pci.c | 6 +++---
 include/hw/ppc/ppc4xx.h | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index f061b8cf3b..45f409c838 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -205,8 +205,7 @@ static void bamboo_init(MachineState *machine)
 ppc4xx_sdram_ddr_enable(PPC4xx_SDRAM_DDR(dev));
 
 /* PCI */
-dev = sysbus_create_varargs(TYPE_PPC4xx_PCI_HOST_BRIDGE,
-PPC440EP_PCI_CONFIG,
+dev = sysbus_create_varargs(TYPE_PPC4xx_PCI_HOST, PPC440EP_PCI_CONFIG,
 qdev_get_gpio_in(uicdev, pci_irq_nrs[0]),
 qdev_get_gpio_in(uicdev, pci_irq_nrs[1]),
 qdev_get_gpio_in(uicdev, pci_irq_nrs[2]),
diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c
index 1d4a50fa7c..fbdf8266d8 100644
--- a/hw/ppc/ppc4xx_pci.c
+++ b/hw/ppc/ppc4xx_pci.c
@@ -46,7 +46,7 @@ struct PCITargetMap {
 uint32_t la;
 };
 
-OBJECT_DECLARE_SIMPLE_TYPE(PPC4xxPCIState, PPC4xx_PCI_HOST_BRIDGE)
+OBJECT_DECLARE_SIMPLE_TYPE(PPC4xxPCIState, PPC4xx_PCI_HOST)
 
 #define PPC4xx_PCI_NR_PMMS 3
 #define PPC4xx_PCI_NR_PTMS 2
@@ -321,7 +321,7 @@ static void ppc4xx_pcihost_realize(DeviceState *dev, Error 
**errp)
 int i;
 
 h = PCI_HOST_BRIDGE(dev);
-s = PPC4xx_PCI_HOST_BRIDGE(dev);
+s = PPC4xx_PCI_HOST(dev);
 
 for (i = 0; i < ARRAY_SIZE(s->irq); i++) {
 sysbus_init_irq(sbd, &s->irq[i]);
@@ -386,7 +386,7 @@ static void ppc4xx_pcihost_class_init(ObjectClass *klass, 
void *data)
 }
 
 static const TypeInfo ppc4xx_pcihost_info = {
-.name  = TYPE_PPC4xx_PCI_HOST_BRIDGE,
+.name  = TYPE_PPC4xx_PCI_HOST,
 .parent= TYPE_PCI_HOST_BRIDGE,
 .instance_size = sizeof(PPC4xxPCIState),
 .class_init= ppc4xx_pcihost_class_init,
diff --git a/include/hw/ppc/ppc4xx.h b/include/hw/ppc/ppc4xx.h
index 39ca602442..e053b9751b 100644
--- a/include/hw/ppc/ppc4xx.h
+++ b/include/hw/ppc/ppc4xx.h
@@ -29,7 +29,7 @@
 #include "exec/memory.h"
 #include "hw/sysbus.h"
 
-#define TYPE_PPC4xx_PCI_HOST_BRIDGE "ppc4xx-pcihost"
+#define TYPE_PPC4xx_PCI_HOST "ppc4xx-pci-host"
 #define TYPE_PPC460EX_PCIE_HOST "ppc460ex-pcie-host"
 
 /*
-- 
2.30.9

[PATCH 06/13] sam460ex: Remove address_space_mem local variable

2023-07-03 Thread BALATON Zoltan

Some places already use  get_system_memory() directly so replace the
remaining uses and drop the local variable.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/sam460ex.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index aaa8d2f4a5..f098226974 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -266,7 +266,6 @@ static void main_cpu_reset(void *opaque)
 
 static void sam460ex_init(MachineState *machine)
 {
-MemoryRegion *address_space_mem = get_system_memory();
 MemoryRegion *isa = g_new(MemoryRegion, 1);
 MemoryRegion *l2cache_ram = g_new(MemoryRegion, 1);
 DeviceState *uic[4];
@@ -406,7 +405,8 @@ static void sam460ex_init(MachineState *machine)
 /* FIXME: remove this after fixing l2sram mapping in ppc440_uc.c? */
 memory_region_init_ram(l2cache_ram, NULL, "ppc440.l2cache_ram", 256 * KiB,
&error_abort);
-memory_region_add_subregion(address_space_mem, 0x4LL, l2cache_ram);
+memory_region_add_subregion(get_system_memory(), 0x4LL,
+l2cache_ram);
 
 /* USB */
 sysbus_create_simple(TYPE_PPC4xx_EHCI, 0x4bffd0400,
@@ -444,13 +444,13 @@ static void sam460ex_init(MachineState *machine)
 /* SoC has 4 UARTs
  * but board has only one wired and two are present in fdt */
 if (serial_hd(0) != NULL) {
-serial_mm_init(address_space_mem, 0x4ef600300, 0,
+serial_mm_init(get_system_memory(), 0x4ef600300, 0,
qdev_get_gpio_in(uic[1], 1),
PPC_SERIAL_MM_BAUDBASE, serial_hd(0),
DEVICE_BIG_ENDIAN);
 }
 if (serial_hd(1) != NULL) {
-serial_mm_init(address_space_mem, 0x4ef600400, 0,
+serial_mm_init(get_system_memory(), 0x4ef600400, 0,
qdev_get_gpio_in(uic[0], 1),
PPC_SERIAL_MM_BAUDBASE, serial_hd(1),
DEVICE_BIG_ENDIAN);
-- 
2.30.9

[PATCH 08/13] ppc440: Remove ppc460ex_pcie_init legacy init function

2023-07-03 Thread BALATON Zoltan

After previous changes we can now remove the legacy init function and
move the device creation to board code.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440.h |  1 -
 hw/ppc/ppc440_uc.c  | 21 -
 hw/ppc/sam460ex.c   | 17 -
 include/hw/ppc/ppc4xx.h |  1 +
 4 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/hw/ppc/ppc440.h b/hw/ppc/ppc440.h
index ae42bcf0c8..909373fb38 100644
--- a/hw/ppc/ppc440.h
+++ b/hw/ppc/ppc440.h
@@ -18,6 +18,5 @@ void ppc4xx_cpr_init(CPUPPCState *env);
 void ppc4xx_sdr_init(CPUPPCState *env);
 void ppc4xx_ahb_init(CPUPPCState *env);
 void ppc4xx_dma_init(CPUPPCState *env, int dcr_base);
-void ppc460ex_pcie_init(PowerPCCPU *cpu);
 
 #endif /* PPC440_H */
diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
index 61782a5c1e..545f36edce 100644
--- a/hw/ppc/ppc440_uc.c
+++ b/hw/ppc/ppc440_uc.c
@@ -770,7 +770,6 @@ void ppc4xx_dma_init(CPUPPCState *env, int dcr_base)
  */
 #include "hw/pci/pcie_host.h"
 
-#define TYPE_PPC460EX_PCIE_HOST "ppc460ex-pcie-host"
 OBJECT_DECLARE_SIMPLE_TYPE(PPC460EXPCIEState, PPC460EX_PCIE_HOST)
 
 struct PPC460EXPCIEState {
@@ -799,9 +798,6 @@ struct PPC460EXPCIEState {
 uint32_t cfg;
 };
 
-#define DCRN_PCIE0_BASE 0x100
-#define DCRN_PCIE1_BASE 0x120
-
 enum {
 PEGPL_CFGBAH = 0x0,
 PEGPL_CFGBAL,
@@ -1096,20 +1092,3 @@ static void ppc460ex_pcie_register(void)
 }
 
 type_init(ppc460ex_pcie_register)
-
-void ppc460ex_pcie_init(PowerPCCPU *cpu)
-{
-DeviceState *dev;
-
-dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
-qdev_prop_set_int32(dev, "busnum", 0);
-qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE0_BASE);
-object_property_set_link(OBJECT(dev), "cpu", OBJECT(cpu), &error_abort);
-sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-
-dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
-qdev_prop_set_int32(dev, "busnum", 1);
-qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE1_BASE);
-object_property_set_link(OBJECT(dev), "cpu", OBJECT(cpu), &error_abort);
-sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-}
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index f098226974..d446cfc37b 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -45,6 +45,9 @@
 /* dd bs=1 skip=$(($(stat -c '%s' updater/updater-460) - 0x8)) \
  if=updater/updater-460 of=u-boot-sam460-20100605.bin */
 
+#define PCIE0_DCRN_BASE 0x100
+#define PCIE1_DCRN_BASE 0x120
+
 /* from Sam460 U-Boot include/configs/Sam460ex.h */
 #define FLASH_BASE 0xfff0
 #define FLASH_BASE_H   0x4
@@ -421,8 +424,20 @@ static void sam460ex_init(MachineState *machine)
 usb_create_simple(usb_bus_find(-1), "usb-kbd");
 usb_create_simple(usb_bus_find(-1), "usb-mouse");
 
+/* PCIe buses */
+dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
+qdev_prop_set_int32(dev, "busnum", 0);
+qdev_prop_set_int32(dev, "dcrn-base", PCIE0_DCRN_BASE);
+object_property_set_link(OBJECT(dev), "cpu", OBJECT(cpu), &error_abort);
+sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
+qdev_prop_set_int32(dev, "busnum", 1);
+qdev_prop_set_int32(dev, "dcrn-base", PCIE1_DCRN_BASE);
+object_property_set_link(OBJECT(dev), "cpu", OBJECT(cpu), &error_abort);
+sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
 /* PCI bus */
-ppc460ex_pcie_init(cpu);
 /* All PCI irqs are connected to the same UIC pin (cf. UBoot source) */
 dev = sysbus_create_simple("ppc440-pcix-host", 0xc0ec0,
qdev_get_gpio_in(uic[1], 0));
diff --git a/include/hw/ppc/ppc4xx.h b/include/hw/ppc/ppc4xx.h
index f8c86e09ec..39ca602442 100644
--- a/include/hw/ppc/ppc4xx.h
+++ b/include/hw/ppc/ppc4xx.h
@@ -30,6 +30,7 @@
 #include "hw/sysbus.h"
 
 #define TYPE_PPC4xx_PCI_HOST_BRIDGE "ppc4xx-pcihost"
+#define TYPE_PPC460EX_PCIE_HOST "ppc460ex-pcie-host"
 
 /*
  * Generic DCR device
-- 
2.30.9

[PATCH 11/13] ppc440_pcix: Rename QOM type define abd move it to common header

2023-07-03 Thread BALATON Zoltan

Rename TYPE_PPC440_PCIX_HOST_BRIDGE to better match its string value,
move it to common header and use it also in sam460ex to replace hard
coded type name.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440_pcix.c| 9 -
 hw/ppc/sam460ex.c   | 2 +-
 include/hw/ppc/ppc4xx.h | 1 +
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c
index dfec25ac83..adfecf1e76 100644
--- a/hw/ppc/ppc440_pcix.c
+++ b/hw/ppc/ppc440_pcix.c
@@ -44,8 +44,7 @@ struct PLBInMap {
 MemoryRegion mr;
 };
 
-#define TYPE_PPC440_PCIX_HOST_BRIDGE "ppc440-pcix-host"
-OBJECT_DECLARE_SIMPLE_TYPE(PPC440PCIXState, PPC440_PCIX_HOST_BRIDGE)
+OBJECT_DECLARE_SIMPLE_TYPE(PPC440PCIXState, PPC440_PCIX_HOST)
 
 #define PPC440_PCIX_NR_POMS 3
 #define PPC440_PCIX_NR_PIMS 3
@@ -397,7 +396,7 @@ static const MemoryRegionOps pci_reg_ops = {
 
 static void ppc440_pcix_reset(DeviceState *dev)
 {
-struct PPC440PCIXState *s = PPC440_PCIX_HOST_BRIDGE(dev);
+struct PPC440PCIXState *s = PPC440_PCIX_HOST(dev);
 int i;
 
 for (i = 0; i < PPC440_PCIX_NR_POMS; i++) {
@@ -487,7 +486,7 @@ static void ppc440_pcix_realize(DeviceState *dev, Error 
**errp)
 PCIHostState *h;
 
 h = PCI_HOST_BRIDGE(dev);
-s = PPC440_PCIX_HOST_BRIDGE(dev);
+s = PPC440_PCIX_HOST(dev);
 
 sysbus_init_irq(sbd, &s->irq);
 memory_region_init(&s->busmem, OBJECT(dev), "pci bus memory", UINT64_MAX);
@@ -525,7 +524,7 @@ static void ppc440_pcix_class_init(ObjectClass *klass, void 
*data)
 }
 
 static const TypeInfo ppc440_pcix_info = {
-.name  = TYPE_PPC440_PCIX_HOST_BRIDGE,
+.name  = TYPE_PPC440_PCIX_HOST,
 .parent= TYPE_PCI_HOST_BRIDGE,
 .instance_size = sizeof(PPC440PCIXState),
 .class_init= ppc440_pcix_class_init,
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index d446cfc37b..8d0e551d14 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -439,7 +439,7 @@ static void sam460ex_init(MachineState *machine)
 
 /* PCI bus */
 /* All PCI irqs are connected to the same UIC pin (cf. UBoot source) */
-dev = sysbus_create_simple("ppc440-pcix-host", 0xc0ec0,
+dev = sysbus_create_simple(TYPE_PPC440_PCIX_HOST, 0xc0ec0,
qdev_get_gpio_in(uic[1], 0));
 pci_bus = PCI_BUS(qdev_get_child_bus(dev, "pci.0"));
 
diff --git a/include/hw/ppc/ppc4xx.h b/include/hw/ppc/ppc4xx.h
index 766d575e86..ea7740239b 100644
--- a/include/hw/ppc/ppc4xx.h
+++ b/include/hw/ppc/ppc4xx.h
@@ -31,6 +31,7 @@
 
 #define TYPE_PPC4xx_HOST_BRIDGE "ppc4xx-host-bridge"
 #define TYPE_PPC4xx_PCI_HOST "ppc4xx-pci-host"
+#define TYPE_PPC440_PCIX_HOST "ppc440-pcix-host"
 #define TYPE_PPC460EX_PCIE_HOST "ppc460ex-pcie-host"
 
 /*
-- 
2.30.9

[PATCH 07/13] ppc440: Add busnum property to PCIe controller model

2023-07-03 Thread BALATON Zoltan

Instead of guessing controller number from dcrn_base add a property so
the device does not need knowledge about where it is used.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440_uc.c | 25 +++--
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
index 0c5d999878..61782a5c1e 100644
--- a/hw/ppc/ppc440_uc.c
+++ b/hw/ppc/ppc440_uc.c
@@ -779,6 +779,7 @@ struct PPC460EXPCIEState {
 MemoryRegion busmem;
 MemoryRegion iomem;
 qemu_irq irq[4];
+int32_t num;
 int32_t dcrn_base;
 PowerPCCPU *cpu;
 
@@ -1039,32 +1040,25 @@ static void ppc460ex_pcie_realize(DeviceState *dev, 
Error **errp)
 {
 PPC460EXPCIEState *s = PPC460EX_PCIE_HOST(dev);
 PCIHostState *pci = PCI_HOST_BRIDGE(dev);
-int i, id;
-char buf[16];
+int i;
+char buf[20];
 
 if (!s->cpu) {
 error_setg(errp, "cpu link property must be set");
 return;
 }
-switch (s->dcrn_base) {
-case DCRN_PCIE0_BASE:
-id = 0;
-break;
-case DCRN_PCIE1_BASE:
-id = 1;
-break;
-default:
-error_setg(errp, "invalid PCIe DCRN base");
+if (s->num < 0 || s->dcrn_base < 0) {
+error_setg(errp, "busnum and dcrn-base properties must be set");
 return;
 }
-snprintf(buf, sizeof(buf), "pcie%d-mem", id);
+snprintf(buf, sizeof(buf), "pcie%d-mem", s->num);
 memory_region_init(&s->busmem, OBJECT(s), buf, UINT64_MAX);
-snprintf(buf, sizeof(buf), "pcie%d-io", id);
+snprintf(buf, sizeof(buf), "pcie%d-io", s->num);
 memory_region_init(&s->iomem, OBJECT(s), buf, 0x1);
 for (i = 0; i < 4; i++) {
 sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq[i]);
 }
-snprintf(buf, sizeof(buf), "pcie.%d", id);
+snprintf(buf, sizeof(buf), "pcie.%d", s->num);
 pci->bus = pci_register_root_bus(DEVICE(s), buf, ppc460ex_set_irq,
 pci_swizzle_map_irq_fn, s, &s->busmem,
 &s->iomem, 0, 4, TYPE_PCIE_BUS);
@@ -1072,6 +1066,7 @@ static void ppc460ex_pcie_realize(DeviceState *dev, Error 
**errp)
 }
 
 static Property ppc460ex_pcie_props[] = {
+DEFINE_PROP_INT32("busnum", PPC460EXPCIEState, num, -1),
 DEFINE_PROP_INT32("dcrn-base", PPC460EXPCIEState, dcrn_base, -1),
 DEFINE_PROP_LINK("cpu", PPC460EXPCIEState, cpu, TYPE_POWERPC_CPU,
  PowerPCCPU *),
@@ -1107,11 +1102,13 @@ void ppc460ex_pcie_init(PowerPCCPU *cpu)
 DeviceState *dev;
 
 dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
+qdev_prop_set_int32(dev, "busnum", 0);
 qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE0_BASE);
 object_property_set_link(OBJECT(dev), "cpu", OBJECT(cpu), &error_abort);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
 dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
+qdev_prop_set_int32(dev, "busnum", 1);
 qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE1_BASE);
 object_property_set_link(OBJECT(dev), "cpu", OBJECT(cpu), &error_abort);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-- 
2.30.9

[PATCH 02/13] ppc440: Add cpu link property to PCIe controller model

2023-07-03 Thread BALATON Zoltan

The PCIe controller model uses PPC DCRs but cannot be modeled with
TYPE_PPC4xx_DCR_DEVICE as it derives from TYPE_PCIE_HOST_BRIDGE. Add a
cpu link property to it similar to other DCR devices to allow
registering DCRs from the device model.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440_uc.c | 114 -
 1 file changed, 62 insertions(+), 52 deletions(-)

diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
index 8eb985d714..b26c0cee1b 100644
--- a/hw/ppc/ppc440_uc.c
+++ b/hw/ppc/ppc440_uc.c
@@ -779,6 +779,7 @@ struct PPC460EXPCIEState {
 MemoryRegion iomem;
 qemu_irq irq[4];
 int32_t dcrn_base;
+PowerPCCPU *cpu;
 
 uint64_t cfg_base;
 uint32_t cfg_mask;
@@ -1001,6 +1002,58 @@ static void ppc460ex_set_irq(void *opaque, int irq_num, 
int level)
qemu_set_irq(s->irq[irq_num], level);
 }
 
+static void ppc460ex_pcie_register_dcrs(PPC460EXPCIEState *s)
+{
+CPUPPCState *env = &s->cpu->env;
+
+ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGBAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGBAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGMSK, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGBAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGBAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGMSK, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1BAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1BAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1MSKH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1MSKL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2BAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2BAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2MSKH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2MSKL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3BAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3BAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3MSKH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3MSKL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_REGBAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_REGBAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_REGMSK, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_SPECIAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ppc_dcr_register(env, s->dcrn_base + PEGPL_CFG, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+}
+
 static void ppc460ex_pcie_realize(DeviceState *dev, Error **errp)
 {
 PPC460EXPCIEState *s = PPC460EX_PCIE_HOST(dev);
@@ -1008,6 +1061,10 @@ static void ppc460ex_pcie_realize(DeviceState *dev, 
Error **errp)
 int i, id;
 char buf[16];
 
+if (!s->cpu) {
+error_setg(errp, "cpu link property must be set");
+return;
+}
 switch (s->dcrn_base) {
 case DCRN_PCIE0_BASE:
 id = 0;
@@ -1028,10 +1085,13 @@ static void ppc460ex_pcie_realize(DeviceState *dev, 
Error **errp)
 pci->bus = pci_register_root_bus(DEVICE(s), buf, ppc460ex_set_irq,
 pci_swizzle_map_irq_fn, s, &s->iomem,
 get_system_io(), 0, 4, TYPE_PCIE_BUS);
+ppc460ex_pcie_register_dcrs(s);
 }
 
 static Property ppc460ex_pcie_props[] = {
 DEFINE_PROP_INT32("dcrn-base", PPC460EXPCIEState, dcrn_base, -1),
+DEFINE_PROP_LINK("cpu", PPC460EXPCIEState, cpu, TYPE_POWERPC_CPU,
+ PowerPCCPU *),
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1059,67 +1119,17 @@ static void ppc460ex_pcie_register(void)
 
 type_init(ppc460ex_pcie_register)
 
-static void ppc460ex_pcie_register_dcrs(PPC460EXPCIEState *s, CPUPPCState *env)
-{
-ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGBAH, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_regis

[PATCH 04/13] ppc440: Rename local variable in dcr_read_pcie()

2023-07-03 Thread BALATON Zoltan

Rename local variable storing state struct in dcr_read_pcie() for
brevity and consistency with other functions.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440_uc.c | 50 +++---
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
index db83a0dec8..38ee27f437 100644
--- a/hw/ppc/ppc440_uc.c
+++ b/hw/ppc/ppc440_uc.c
@@ -828,78 +828,78 @@ enum {
 
 static uint32_t dcr_read_pcie(void *opaque, int dcrn)
 {
-PPC460EXPCIEState *state = opaque;
+PPC460EXPCIEState *s = opaque;
 uint32_t ret = 0;
 
-switch (dcrn - state->dcrn_base) {
+switch (dcrn - s->dcrn_base) {
 case PEGPL_CFGBAH:
-ret = state->cfg_base >> 32;
+ret = s->cfg_base >> 32;
 break;
 case PEGPL_CFGBAL:
-ret = state->cfg_base;
+ret = s->cfg_base;
 break;
 case PEGPL_CFGMSK:
-ret = state->cfg_mask;
+ret = s->cfg_mask;
 break;
 case PEGPL_MSGBAH:
-ret = state->msg_base >> 32;
+ret = s->msg_base >> 32;
 break;
 case PEGPL_MSGBAL:
-ret = state->msg_base;
+ret = s->msg_base;
 break;
 case PEGPL_MSGMSK:
-ret = state->msg_mask;
+ret = s->msg_mask;
 break;
 case PEGPL_OMR1BAH:
-ret = state->omr1_base >> 32;
+ret = s->omr1_base >> 32;
 break;
 case PEGPL_OMR1BAL:
-ret = state->omr1_base;
+ret = s->omr1_base;
 break;
 case PEGPL_OMR1MSKH:
-ret = state->omr1_mask >> 32;
+ret = s->omr1_mask >> 32;
 break;
 case PEGPL_OMR1MSKL:
-ret = state->omr1_mask;
+ret = s->omr1_mask;
 break;
 case PEGPL_OMR2BAH:
-ret = state->omr2_base >> 32;
+ret = s->omr2_base >> 32;
 break;
 case PEGPL_OMR2BAL:
-ret = state->omr2_base;
+ret = s->omr2_base;
 break;
 case PEGPL_OMR2MSKH:
-ret = state->omr2_mask >> 32;
+ret = s->omr2_mask >> 32;
 break;
 case PEGPL_OMR2MSKL:
-ret = state->omr3_mask;
+ret = s->omr3_mask;
 break;
 case PEGPL_OMR3BAH:
-ret = state->omr3_base >> 32;
+ret = s->omr3_base >> 32;
 break;
 case PEGPL_OMR3BAL:
-ret = state->omr3_base;
+ret = s->omr3_base;
 break;
 case PEGPL_OMR3MSKH:
-ret = state->omr3_mask >> 32;
+ret = s->omr3_mask >> 32;
 break;
 case PEGPL_OMR3MSKL:
-ret = state->omr3_mask;
+ret = s->omr3_mask;
 break;
 case PEGPL_REGBAH:
-ret = state->reg_base >> 32;
+ret = s->reg_base >> 32;
 break;
 case PEGPL_REGBAL:
-ret = state->reg_base;
+ret = s->reg_base;
 break;
 case PEGPL_REGMSK:
-ret = state->reg_mask;
+ret = s->reg_mask;
 break;
 case PEGPL_SPECIAL:
-ret = state->special;
+ret = s->special;
 break;
 case PEGPL_CFG:
-ret = state->cfg;
+ret = s->cfg;
 break;
 }
 
-- 
2.30.9

[PATCH 12/13] ppc440_pcix: Don't use iomem for regs

2023-07-03 Thread BALATON Zoltan

The iomem memory region is better used for the PCI IO space but
currently used for registers. Stop using it for that to allow this to
be cleaned up in the next patch.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440_pcix.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c
index adfecf1e76..ee2dc44f67 100644
--- a/hw/ppc/ppc440_pcix.c
+++ b/hw/ppc/ppc440_pcix.c
@@ -484,6 +484,7 @@ static void ppc440_pcix_realize(DeviceState *dev, Error 
**errp)
 SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 PPC440PCIXState *s;
 PCIHostState *h;
+MemoryRegion *regs = g_new(MemoryRegion, 1);
 
 h = PCI_HOST_BRIDGE(dev);
 s = PPC440_PCIX_HOST(dev);
@@ -507,11 +508,11 @@ static void ppc440_pcix_realize(DeviceState *dev, Error 
**errp)
   h, "pci-conf-idx", 4);
 memory_region_init_io(&h->data_mem, OBJECT(s), &pci_host_data_le_ops,
   h, "pci-conf-data", 4);
-memory_region_init_io(&s->iomem, OBJECT(s), &pci_reg_ops, s,
-  "pci.reg", PPC440_REG_SIZE);
+memory_region_init_io(regs, OBJECT(s), &pci_reg_ops, s, "pci-reg",
+  PPC440_REG_SIZE);
 memory_region_add_subregion(&s->container, PCIC0_CFGADDR, &h->conf_mem);
 memory_region_add_subregion(&s->container, PCIC0_CFGDATA, &h->data_mem);
-memory_region_add_subregion(&s->container, PPC440_REG_BASE, &s->iomem);
+memory_region_add_subregion(&s->container, PPC440_REG_BASE, regs);
 sysbus_init_mmio(sbd, &s->container);
 }
 
-- 
2.30.9

[PATCH 03/13] ppc440: Add a macro to shorten PCIe controller DCR registration

2023-07-03 Thread BALATON Zoltan

It is more readable to wrap the complex call to ppc_dcr_register in a
macro when needed repeatedly.

Signed-off-by: BALATON Zoltan 
---
 hw/ppc/ppc440_uc.c | 76 +-
 1 file changed, 28 insertions(+), 48 deletions(-)

diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
index b26c0cee1b..db83a0dec8 100644
--- a/hw/ppc/ppc440_uc.c
+++ b/hw/ppc/ppc440_uc.c
@@ -1002,56 +1002,36 @@ static void ppc460ex_set_irq(void *opaque, int irq_num, 
int level)
qemu_set_irq(s->irq[irq_num], level);
 }
 
+#define PPC440_PCIE_DCR(s, dcrn) \
+ppc_dcr_register(&(s)->cpu->env, (s)->dcrn_base + (dcrn), s, \
+ &dcr_read_pcie, &dcr_write_pcie)
+
+
 static void ppc460ex_pcie_register_dcrs(PPC460EXPCIEState *s)
 {
-CPUPPCState *env = &s->cpu->env;
-
-ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGBAH, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGBAL, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGMSK, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGBAH, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGBAL, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGMSK, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1BAH, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1BAL, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1MSKH, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1MSKL, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2BAH, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2BAL, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2MSKH, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2MSKL, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3BAH, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3BAL, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3MSKH, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3MSKL, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_REGBAH, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_REGBAL, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_REGMSK, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_SPECIAL, s,
- &dcr_read_pcie, &dcr_write_pcie);
-ppc_dcr_register(env, s->dcrn_base + PEGPL_CFG, s,
- &dcr_read_pcie, &dcr_write_pcie);
+PPC440_PCIE_DCR(s, PEGPL_CFGBAH);
+PPC440_PCIE_DCR(s, PEGPL_CFGBAL);
+PPC440_PCIE_DCR(s, PEGPL_CFGMSK);
+PPC440_PCIE_DCR(s, PEGPL_MSGBAH);
+PPC440_PCIE_DCR(s, PEGPL_MSGBAL);
+PPC440_PCIE_DCR(s, PEGPL_MSGMSK);
+PPC440_PCIE_DCR(s, PEGPL_OMR1BAH);
+PPC440_PCIE_DCR(s, PEGPL_OMR1BAL);
+PPC440_PCIE_DCR(s, PEGPL_OMR1MSKH);
+PPC440_PCIE_DCR(s, PEGPL_OMR1MSKL);
+PPC440_PCIE_DCR(s, PEGPL_OMR2BAH);
+PPC440_PCIE_DCR(s, PEGPL_OMR2BAL);
+PPC440_PCIE_DCR(s, PEGPL_OMR2MSKH);
+PPC440_PCIE_DCR(s, PEGPL_OMR2MSKL);
+PPC440_PCIE_DCR(s, PEGPL_OMR3BAH);
+PPC440_PCIE_DCR(s, PEGPL_OMR3BAL);
+PPC440_PCIE_DCR(s, PEGPL_OMR3MSKH);
+PPC440_PCIE_DCR(s, PEGPL_OMR3MSKL);
+PPC440_PCIE_DCR(s, PEGPL_REGBAH);
+PPC440_PCIE_DCR(s, PEGPL_REGBAL);
+PPC440_PCIE_DCR(s, PEGPL_REGMSK);
+PPC440_PCIE_DCR(s, PEGPL_SPECIAL);
+PPC440_PCIE_DCR(s, PEGPL_CFG);
 }
 
 static void ppc460ex_pcie_realize(DeviceState *dev, Error **errp)
-- 
2.30.9

Re: [PATCH RESEND v2 1/2] target/i386/kvm: introduce 'pmu-cap-disabled' to set KVM_PMU_CAP_DISABLE

2023-07-03 Thread Dongli Zhang

Hi Like,

On 7/2/23 06:41, Like Xu wrote:
> On Wed, Jun 21, 2023 at 9:39 AM Dongli Zhang  wrote:
>>
>> The "perf stat" at the VM side still works even we set "-cpu host,-pmu" in
>> the QEMU command line. That is, neither "-cpu host,-pmu" nor "-cpu EPYC"
>> could disable the pmu virtualization in an AMD environment.
>>
>> We still see below at VM kernel side ...
>>
>> [0.510611] Performance Events: Fam17h+ core perfctr, AMD PMU driver.
>>
>> ... although we expect something like below.
>>
>> [0.596381] Performance Events: PMU not available due to virtualization, 
>> using software events only.
>> [0.600972] NMI watchdog: Perf NMI watchdog permanently disabled
>>
>> This is because the AMD pmu (v1) does not rely on cpuid to decide if the
>> pmu virtualization is supported.
>>
>> We introduce a new property 'pmu-cap-disabled' for KVM accel to set
>> KVM_PMU_CAP_DISABLE if KVM_CAP_PMU_CAPABILITY is supported. Only x86 host
>> is supported because currently KVM uses KVM_CAP_PMU_CAPABILITY only for
>> x86.
> 
> We may check cpu->enable_pmu when creating the first CPU or a BSP one
> (before it gets running) and then choose whether to disable guest pmu using
> vm ioctl KVM_CAP_PMU_CAPABILITY. Introducing a new property is not too
> acceptable if there are other options.

In the v1 of the implementation, we have implemented something similar: not
based on the cpu_index (or BSP), but to introduce a helper before creating the
KVM vcpu to let the further implementation decide. We did the
KVM_CAP_PMU_CAPABILITY in that helper once.

[PATCH 1/3] kvm: introduce a helper before creating the 1st vcpu
https://lore.kernel.org/all/20221119122901.2469-2-dongli.zh...@oracle.com/

[PATCH 2/3] i386: kvm: disable KVM_CAP_PMU_CAPABILITY if "pmu" is disabled
https://lore.kernel.org/all/20221119122901.2469-3-dongli.zh...@oracle.com/


The below was the suggestion from Greg Kurz about to use per-VCPU property to
control per-VM cap:

"It doesn't seem conceptually correct to configure VM level stuff out of
a vCPU property, which could theoretically be different for each vCPU,
even if this isn't the case with the current code base.

Maybe consider controlling PMU with a machine property and this
could be done in kvm_arch_init() like other VM level stuff ?"

Would you mind comment on that?

Thank you very much!

Dongli Zhang

> 
>>
>> Cc: Joe Jin 
>> Cc: Like Xu 
>> Signed-off-by: Dongli Zhang 
>> ---
>> Changed since v1:
>> - In version 1 we did not introduce the new property. We ioctl
>>   KVM_PMU_CAP_DISABLE only before the creation of the 1st vcpu. We had
>>   introduced a helpfer function to do this job before creating the 1st
>>   KVM vcpu in v1.
>>
>>  accel/kvm/kvm-all.c  |  1 +
>>  include/sysemu/kvm_int.h |  1 +
>>  qemu-options.hx  |  7 ++
>>  target/i386/kvm/kvm.c| 46 
>>  4 files changed, 55 insertions(+)
>>
>> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
>> index 7679f397ae..238098e991 100644
>> --- a/accel/kvm/kvm-all.c
>> +++ b/accel/kvm/kvm-all.c
>> @@ -3763,6 +3763,7 @@ static void kvm_accel_instance_init(Object *obj)
>>  s->xen_version = 0;
>>  s->xen_gnttab_max_frames = 64;
>>  s->xen_evtchn_max_pirq = 256;
>> +s->pmu_cap_disabled = false;
>>  }
>>
>>  /**
>> diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
>> index 511b42bde5..cbbe08ec54 100644
>> --- a/include/sysemu/kvm_int.h
>> +++ b/include/sysemu/kvm_int.h
>> @@ -123,6 +123,7 @@ struct KVMState
>>  uint32_t xen_caps;
>>  uint16_t xen_gnttab_max_frames;
>>  uint16_t xen_evtchn_max_pirq;
>> +bool pmu_cap_disabled;
>>  };
>>
>>  void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index b57489d7ca..1976c0ca3e 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -187,6 +187,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel,
>>  "tb-size=n (TCG translation block cache size)\n"
>>  "dirty-ring-size=n (KVM dirty ring GFN count, default 
>> 0)\n"
>>  "
>> notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM 
>> exit and set notify window, x86 only)\n"
>> +"pmu-cap-disabled=true|false (disable 
>> KVM_CAP_PMU_CAPABILITY, x86 only, default false)\n"
>>  "thread=single|multi (enable multi-threaded TCG)\n", 
>> QEMU_ARCH_ALL)
>>  SRST
>>  ``-accel name[,prop=value[,...]]``
>> @@ -254,6 +255,12 @@ SRST
>>  open up for a specified of time (i.e. notify-window).
>>  Default: notify-vmexit=run,notify-window=0.
>>
>> +``pmu-cap-disabled=true|false``
>> +When the KVM accelerator is used, it controls whether to disable the
>> +KVM_CAP_PMU_CAPABILITY via KVM_PMU_CAP_DISABLE. When disabled, the
>> +PMU virtualization is disabled at the KVM module side. This is for
>> +x86 host only.
>> +
>>  ERST
>>
>>  DEF("smp

RE: [PATCH] Hexagon: move GETPC() calls to top level helpers

2023-07-03 Thread Brian Cain



> -Original Message-
> From: Matheus Tavares Bernardino 
> Sent: Monday, July 3, 2023 4:50 PM
> To: qemu-devel@nongnu.org
> Cc: Brian Cain ; Marco Liebel (QUIC)
> ; ltaylorsimp...@gmail.com
> Subject: [PATCH] Hexagon: move GETPC() calls to top level helpers
> 
> As docs/devel/loads-stores.rst states:
> 
>   ``GETPC()`` should be used with great care: calling
>   it in other functions that are *not* the top level
>   ``HELPER(foo)`` will cause unexpected behavior. Instead, the
>   value of ``GETPC()`` should be read from the helper and passed
>   if needed to the functions that the helper calls.
> 
> Let's fix the GETPC() usage in Hexagon, making sure it's always called
> from top level helpers and passed down to the places where it's
> needed. There are two snippets where that is not currently the case:
> 
> - probe_store(), which is only called from two helpers, so it's easy to
>   move GETPC() up.
> 
> - mem_load*() functions, which are also called directly from helpers,
>   but through the MEM_LOAD*() set of macros. Note that this are only
>   used when compiling with --disable-hexagon-idef-parser.
> 
>   In this case, we also take this opportunity to simplify the code,
>   unifying the mem_load*() functions.
> 
> Signed-off-by: Matheus Tavares Bernardino 
> ---
>  target/hexagon/macros.h| 22 ++---
>  target/hexagon/op_helper.h | 11 ++---
>  target/hexagon/op_helper.c | 49 +++---
>  3 files changed, 25 insertions(+), 57 deletions(-)
> 
> diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
> index 5451b061ee..efb8013912 100644
> --- a/target/hexagon/macros.h
> +++ b/target/hexagon/macros.h
> @@ -173,14 +173,20 @@
>  #define MEM_STORE8(VA, DATA, SLOT) \
>  MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
>  #else
> -#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, pkt_has_store_s1, slot,
> VA))
> +
> +#define MEM_LOADn(SIZE, VA) ({ \
> +check_noshuf(env, pkt_has_store_s1, slot, VA, SIZE); \
> +cpu_ldub_data_ra(env, VA, GETPC()); \
> +})
> +
> +#define MEM_LOAD1s(VA) ((int8_t)MEM_LOADn(1, VA))
> +#define MEM_LOAD1u(VA) ((uint8_t)MEM_LOADn(1, VA))
> +#define MEM_LOAD2s(VA) ((int16_t)MEM_LOADn(2, VA))
> +#define MEM_LOAD2u(VA) ((uint16_t)MEM_LOADn(2, VA))
> +#define MEM_LOAD4s(VA) ((int32_t)MEM_LOADn(4, VA))
> +#define MEM_LOAD4u(VA) ((uint32_t)MEM_LOADn(4, VA))
> +#define MEM_LOAD8s(VA) ((int64_t)MEM_LOADn(8, VA))
> +#define MEM_LOAD8u(VA) ((uint64_t)MEM_LOADn(8, VA))
> 
>  #define MEM_STORE1(VA, DATA, SLOT) log_store32(env, VA, DATA, 1, SLOT)
>  #define MEM_STORE2(VA, DATA, SLOT) log_store32(env, VA, DATA, 2, SLOT)
> diff --git a/target/hexagon/op_helper.h b/target/hexagon/op_helper.h
> index 8f3764d15e..845c3d197e 100644
> --- a/target/hexagon/op_helper.h
> +++ b/target/hexagon/op_helper.h
> @@ -19,15 +19,8 @@
>  #define HEXAGON_OP_HELPER_H
> 
>  /* Misc functions */
> -uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1,
> -  uint32_t slot, target_ulong vaddr);
> -uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1,
> -   uint32_t slot, target_ulong vaddr);
> -uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1,
> -   uint32_t slot, target_ulong vaddr);
> -uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1,
> -   uint32_t slot, target_ulong vaddr);
> -
> +void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
> +  uint32_t slot, target_ulong vaddr, int size);
>  void log_store64(CPUHexagonState *env, target_ulong addr,
>   int64_t val, int width, int slot);
>  void log_store32(CPUHexagonState *env, target_ulong addr,
> diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
> index 12967ac21e..1bc9c7fc2e 100644
> --- a/target/hexagon/op_helper.c
> +++ b/target/hexagon/op_helper.c
> @@ -467,13 +467,12 @@ int32_t HELPER(cabacdecbin_pred)(int64_t RssV,
> int64_t RttV)
>  }
> 
>  static void probe_store(CPUHexagonState *env, int slot, int mmu_idx,
> -bool is_predicated)
> +bool is_predicated, uintptr_t retaddr)
>  {
>  if (!is_predicated || !(env->slot_cancelled & (1 << slot))) {
>  size1u_t width = env->mem_log_stores[slot].width;
>  target_ulong va = env->mem_log_stores[slot].va;
> -uintptr_t ra = GETPC();
> -probe_write

[PATCH] Hexagon: move GETPC() calls to top level helpers

2023-07-03 Thread Matheus Tavares Bernardino

As docs/devel/loads-stores.rst states:

  ``GETPC()`` should be used with great care: calling
  it in other functions that are *not* the top level
  ``HELPER(foo)`` will cause unexpected behavior. Instead, the
  value of ``GETPC()`` should be read from the helper and passed
  if needed to the functions that the helper calls.

Let's fix the GETPC() usage in Hexagon, making sure it's always called
from top level helpers and passed down to the places where it's
needed. There are two snippets where that is not currently the case:

- probe_store(), which is only called from two helpers, so it's easy to
  move GETPC() up.

- mem_load*() functions, which are also called directly from helpers,
  but through the MEM_LOAD*() set of macros. Note that this are only
  used when compiling with --disable-hexagon-idef-parser.

  In this case, we also take this opportunity to simplify the code,
  unifying the mem_load*() functions.

Signed-off-by: Matheus Tavares Bernardino 
---
 target/hexagon/macros.h| 22 ++---
 target/hexagon/op_helper.h | 11 ++---
 target/hexagon/op_helper.c | 49 +++---
 3 files changed, 25 insertions(+), 57 deletions(-)

diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 5451b061ee..efb8013912 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -173,14 +173,20 @@
 #define MEM_STORE8(VA, DATA, SLOT) \
 MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
 #else
-#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, pkt_has_store_s1, slot, VA))
+
+#define MEM_LOADn(SIZE, VA) ({ \
+check_noshuf(env, pkt_has_store_s1, slot, VA, SIZE); \
+cpu_ldub_data_ra(env, VA, GETPC()); \
+})
+
+#define MEM_LOAD1s(VA) ((int8_t)MEM_LOADn(1, VA))
+#define MEM_LOAD1u(VA) ((uint8_t)MEM_LOADn(1, VA))
+#define MEM_LOAD2s(VA) ((int16_t)MEM_LOADn(2, VA))
+#define MEM_LOAD2u(VA) ((uint16_t)MEM_LOADn(2, VA))
+#define MEM_LOAD4s(VA) ((int32_t)MEM_LOADn(4, VA))
+#define MEM_LOAD4u(VA) ((uint32_t)MEM_LOADn(4, VA))
+#define MEM_LOAD8s(VA) ((int64_t)MEM_LOADn(8, VA))
+#define MEM_LOAD8u(VA) ((uint64_t)MEM_LOADn(8, VA))
 
 #define MEM_STORE1(VA, DATA, SLOT) log_store32(env, VA, DATA, 1, SLOT)
 #define MEM_STORE2(VA, DATA, SLOT) log_store32(env, VA, DATA, 2, SLOT)
diff --git a/target/hexagon/op_helper.h b/target/hexagon/op_helper.h
index 8f3764d15e..845c3d197e 100644
--- a/target/hexagon/op_helper.h
+++ b/target/hexagon/op_helper.h
@@ -19,15 +19,8 @@
 #define HEXAGON_OP_HELPER_H
 
 /* Misc functions */
-uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1,
-  uint32_t slot, target_ulong vaddr);
-uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1,
-   uint32_t slot, target_ulong vaddr);
-uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1,
-   uint32_t slot, target_ulong vaddr);
-uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1,
-   uint32_t slot, target_ulong vaddr);
-
+void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
+  uint32_t slot, target_ulong vaddr, int size);
 void log_store64(CPUHexagonState *env, target_ulong addr,
  int64_t val, int width, int slot);
 void log_store32(CPUHexagonState *env, target_ulong addr,
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 12967ac21e..1bc9c7fc2e 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -467,13 +467,12 @@ int32_t HELPER(cabacdecbin_pred)(int64_t RssV, int64_t 
RttV)
 }
 
 static void probe_store(CPUHexagonState *env, int slot, int mmu_idx,
-bool is_predicated)
+bool is_predicated, uintptr_t retaddr)
 {
 if (!is_predicated || !(env->slot_cancelled & (1 << slot))) {
 size1u_t width = env->mem_log_stores[slot].width;
 target_ulong va = env->mem_log_stores[slot].va;
-uintptr_t ra = GETPC();
-probe_write(env, va, width, mmu_idx, ra);
+probe_write(env, va, width, mmu_idx, retaddr);
 }
 }
 
@@ -494,7 +493,8 @@ void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState 
*env, int args)
 int mmu_idx = FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX);
 bool is_predicated =
 FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED);
-probe_store(env, 0, mmu_idx, is_predicated);
+uintptr_t ra = GETPC();
+probe_store(env, 0, mmu_idx, is_predicated

Re: [PATCH] riscv: Generate devicetree only after machine initialization is complete

2023-07-03 Thread Guenter Roeck


On 7/3/23 12:25, Daniel Henrique Barboza wrote:

On 7/3/23 00:46, Guenter Roeck wrote:

If the devicetree is created before machine initialization is complete,
it misses dynamic devices. Specifically, the tpm device is not added
to the devicetree file and is therefore not instantiated in Linux.
Create devicetree in virt_machine_done() to solve the problem.

Cc: Alistair Francis 
Fixes: 325b7c4e75 hw/riscv: Enable TPM backends
Signed-off-by: Guenter Roeck 
---
  hw/riscv/virt.c | 9 ++---
  1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index ed4c27487e..08876284f5 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1248,6 +1248,11 @@ static void virt_machine_done(Notifier *notifier, void 
*data)
  uint64_t kernel_entry = 0;
  BlockBackend *pflash_blk0;
+    /* create devicetree if not provided */
+    if (!machine->dtb) {
+    create_fdt(s, memmap);
+    }
+


I suggest moving the entire load/create DT code from virt_machine_init() to
the start of virt_machine_done():

     /* load/create device tree */
     if (machine->dtb) {
     machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
     if (!machine->fdt) {
     error_report("load_device_tree() failed");
     exit(1);
     }
     } else {
     create_fdt(s, memmap);
     }

This way we don't have to look in to 2 different functions to wonder what 
happens
in case machine->dtb is NULL.



I can do that, but I don't know how to test it. Is there a working dtb/machine
combination for riscv which would let me test loading a devicetree file ?

Guenter

GLibC AMD CPUID cache reporting regression (was Re: qemu-user self emulation broken with default CPU on x86/x64)

2023-07-03 Thread Daniel P . Berrangé

On Mon, Jul 03, 2023 at 06:03:08PM +0200, Pierrick Bouvier wrote:
> Hi everyone,
> 
> Recently (in d135f781 [1], between v7.0.0 and v8.0.0), qemu-user default cpu
> was updated to "max" instead of qemu32/qemu64.
> 
> This change "broke" qemu self emulation if this new default cpu is used.
> 
> $ ./qemu-x86_64 ./qemu-x86_64 --version
> qemu-x86_64: ../util/cacheflush.c:212: init_cache_info: Assertion `(isize &
> (isize - 1)) == 0' failed.
> qemu: uncaught target signal 6 (Aborted) - core dumped
> Aborted
> 
> By setting cpu back to qemu64, it works again.
> $ ./qemu-x86_64 -cpu qemu64 ./qemu-x86_64  --version
> qemu-x86_64 version 8.0.50 (v8.0.0-2317-ge125b08ed6)
> Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers
> 
> Commenting assert does not work, as qemu aligned malloc fail shortly after.
> 
> I'm willing to fix it, but I'm not sure what is the issue with "max" cpu
> exactly. Is it missing CPU cache line, or something else?

I've observed GLibC is issuing CPUID leaf 0x8000_001d

QEMU 'max' CPU model doesn't defnie xlevel, so QEMU makes it default
to the same as min_xlevel, which is calculated to be 0x8000_000a.

cpu_x86_cpuid() in QEMU sees CPUID leaf 0x8000_001d is above 0x8000_000a,
and so  considers it an invaild CPUID and thus forces it to report
0x_000d which is supposedly what an invalid CPUID leaf should do.

Net result: glibc is asking for 0x8000_001d, but getting back data
for 0x_000d.

This doesn't end happily for obvious reasons, getting garbage for
the dcache sizes.

The 'qemu64' CPU model also gets CPUID leaf 0x8000_001d capped back
to 0x_000d, but crucially qemu64 lacks the 'xsave' feature bit,
so QEMU returns all-zeroes for CPUID leaf 0x_000d. Still not
good, but this makes glibc report 0 for DCACHE_*, which in turn
avoids tripping up the nested qemu which queries DCACHE sysconf.

So the problem is thus more widespread than just 'max' CPU model.

Any QEMU CPU model with vendor=AuthenticAMD and the xsave feature,
and the xlevel unset, will cause glibc to report garbage for the
L1D cache info

Any QEMU CPU model with vendor=AuthenticAMD and without the xsave
feature, and the xlevel unset, will cause glibc to report zeroes
for L1D cache info

Neither is good, but the latter at least doesn't trip up the
nested QEMU when it queries L1D cache info.

I'm unsure if QEMU's behaviour is correct with calculating the
default 'xlevel' values for 'max', but I'm assuming the xlevel
was correct for Opteron_G4/5 since those are explicitly set
in the code for along time.

Over to the GLibC side, I see there was a recent change:

commit 103a469dc7755fd9e8ccf362f3dd4c55dc761908
Author: Sajan Karumanchi 
Date:   Wed Jan 18 18:29:04 2023 +0100

x86: Cache computation for AMD architecture.

All AMD architectures cache details will be computed based on
__cpuid__ `0x8000_001D` and the reference to __cpuid__ `0x8000_0006` will be
zeroed out for future architectures.

Reviewed-by: Premachandra Mallappa 

This introduced the use of CPUID leaf 0x8000_001D. Before this point
glibc would use 0x8000_ and 0x8000_0005 to calculate the cache
size.  QEMU worked correctly with this implementation.

  https://sourceware.org/pipermail/libc-alpha/2023-January/144815.html

The reporter said

   "Though we have done the testing on Zen and pre-Zen architectures,
we recommend to carryout the tests from your end too."

it is unclear if their testing would have covered Opteron_G4/Opteron_G5
architectures, and I not expecting to have had QEMU testing of course ?

I don't have any non-virtual pre-Zen silicon I could verify CPUID
behaviour on. I've not found historic versions of the AMD architecture
reference to see when they first documented 0x8000_001d as a valid
CPUID leaf for getting cache info.

IOW it is still unclear to me whether the root cause bug here is in
QEMU's emulation of CPUID 0x8000_001d, or whether this was actually
a real regression introduced in glibc >= 2.37

I'm tending towards glibc regression though.

Copying Florian and the original AMD patch author

Brief summary

With old glibc 2.36, using QEMU's  qemu64/max CPU models:

# qemu-x86_64-static -cpu qemu64 /bin/getconf -a | grep DCACHE
LEVEL1_DCACHE_SIZE 65536
LEVEL1_DCACHE_ASSOC2
LEVEL1_DCACHE_LINESIZE 64

# qemu-x86_64-static -cpu Opteron_G4 /bin/getconf -a | grep DCACHE
LEVEL1_DCACHE_SIZE 65536
LEVEL1_DCACHE_ASSOC2
LEVEL1_DCACHE_LINESIZE 64

# qemu-x86_64-static -cpu max /bin/getconf -a | grep DCACHE
LEVEL1_DCACHE_SIZE 65536
LEVEL1_DCACHE_ASSOC2
LEVEL1_DCACHE_LINESIZE 64

With new glibc 2.37:

# qemu-x86_64-static -cpu qemu64 /bin/getconf -a | grep DCACHE
LEVEL1_DCACHE_SIZE 0
LEVEL1_DCACHE_ASSOC0
LEVEL1_DCACHE_LINESIZE 0

# qemu-x86_64-static -cpu Opteron_G4 /bin/getconf -a | grep DCACHE
LEVEL1_DCACHE_SIZE

Re: [PATCH v2] hw/ide/piix: properly initialize the BMIBA register

2023-07-03 Thread Bernhard Beschow




Am 3. Juli 2023 07:59:29 UTC schrieb Olaf Hering :
>Sun, 02 Jul 2023 22:18:50 + Bernhard Beschow :
>
>> Do you know if that command calls pci_device_reset() (which would eventually 
>> call piix_ide_reset())?
>
>The function is pci_xen_ide_unplug, which calls device_cold_reset.

I think this explains why the BAR isn't reset: Unlike pci_device_reset(), 
device_cold_reset() lacks calling pci_do_device_reset() which would take care 
of the BARs.

Paolo, Peter: Should we switch to pci_device_reset() in pci_xen_ide_unplug()? 
Or is device_cold_reset() supposed to do everything?

Best regards,
Bernhard

>
>
>Olaf

Re: [PATCH v2 1/2] linux-headers: Update with vfio_ap IRQ index mapping

2023-07-03 Thread Anthony Krowiak





On 7/3/23 12:31 PM, Cédric Le Goater wrote:

On 6/2/23 16:11, Tony Krowiak wrote:

Note: This is a placeholder patch that includes unmerged uapi changes.

Signed-off-by: Tony Krowiak 
Link: 
https://lore.kernel.org/qemu-devel/20230530225544.280031-1-akrow...@linux.ibm.com/


I am preparing a vfio-next tree including these changes plus a 
linux-headers

update. I am just waiting for the 6.5-rc1 tag to be pushed.


Good news, thanks.



Thanks,

C.


---
  include/standard-headers/linux/const.h    |  2 +-
  include/standard-headers/linux/virtio_blk.h   | 18 +++
  .../standard-headers/linux/virtio_config.h    |  6 +++
  include/standard-headers/linux/virtio_net.h   |  1 +
  linux-headers/asm-arm64/kvm.h | 33 
  linux-headers/asm-riscv/kvm.h | 53 ++-
  linux-headers/asm-riscv/unistd.h  |  9 
  linux-headers/asm-s390/unistd_32.h    |  1 +
  linux-headers/asm-s390/unistd_64.h    |  1 +
  linux-headers/asm-x86/kvm.h   |  3 ++
  linux-headers/linux/const.h   |  2 +-
  linux-headers/linux/kvm.h | 12 +++--
  linux-headers/linux/psp-sev.h |  7 +++
  linux-headers/linux/userfaultfd.h | 17 +-
  linux-headers/linux/vfio.h    |  9 
  15 files changed, 158 insertions(+), 16 deletions(-)

diff --git a/include/standard-headers/linux/const.h 
b/include/standard-headers/linux/const.h

index 5e4898725168..1eb84b5087f8 100644
--- a/include/standard-headers/linux/const.h
+++ b/include/standard-headers/linux/const.h
@@ -28,7 +28,7 @@
  #define _BITUL(x)    (_UL(1) << (x))
  #define _BITULL(x)    (_ULL(1) << (x))
-#define __ALIGN_KERNEL(x, a)    __ALIGN_KERNEL_MASK(x, 
(typeof(x))(a) - 1)
+#define __ALIGN_KERNEL(x, a)    __ALIGN_KERNEL_MASK(x, 
(__typeof__(x))(a) - 1)

  #define __ALIGN_KERNEL_MASK(x, mask)    (((x) + (mask)) & ~(mask))
  #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
diff --git a/include/standard-headers/linux/virtio_blk.h 
b/include/standard-headers/linux/virtio_blk.h

index 7155b1a4701b..d7be3cf5e42f 100644
--- a/include/standard-headers/linux/virtio_blk.h
+++ b/include/standard-headers/linux/virtio_blk.h
@@ -138,11 +138,11 @@ struct virtio_blk_config {
  /* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */
  struct virtio_blk_zoned_characteristics {
-    uint32_t zone_sectors;
-    uint32_t max_open_zones;
-    uint32_t max_active_zones;
-    uint32_t max_append_sectors;
-    uint32_t write_granularity;
+    __virtio32 zone_sectors;
+    __virtio32 max_open_zones;
+    __virtio32 max_active_zones;
+    __virtio32 max_append_sectors;
+    __virtio32 write_granularity;
  uint8_t model;
  uint8_t unused2[3];
  } zoned;
@@ -239,11 +239,11 @@ struct virtio_blk_outhdr {
   */
  struct virtio_blk_zone_descriptor {
  /* Zone capacity */
-    uint64_t z_cap;
+    __virtio64 z_cap;
  /* The starting sector of the zone */
-    uint64_t z_start;
+    __virtio64 z_start;
  /* Zone write pointer position in sectors */
-    uint64_t z_wp;
+    __virtio64 z_wp;
  /* Zone type */
  uint8_t z_type;
  /* Zone state */
@@ -252,7 +252,7 @@ struct virtio_blk_zone_descriptor {
  };
  struct virtio_blk_zone_report {
-    uint64_t nr_zones;
+    __virtio64 nr_zones;
  uint8_t reserved[56];
  struct virtio_blk_zone_descriptor zones[];
  };
diff --git a/include/standard-headers/linux/virtio_config.h 
b/include/standard-headers/linux/virtio_config.h

index 965ee6ae237e..8a7d0dc8b007 100644
--- a/include/standard-headers/linux/virtio_config.h
+++ b/include/standard-headers/linux/virtio_config.h
@@ -97,6 +97,12 @@
   */
  #define VIRTIO_F_SR_IOV    37
+/*
+ * This feature indicates that the driver passes extra data (besides
+ * identifying the virtqueue) in its device notifications.
+ */
+#define VIRTIO_F_NOTIFICATION_DATA    38
+
  /*
   * This feature indicates that the driver can reset a queue 
individually.

   */
diff --git a/include/standard-headers/linux/virtio_net.h 
b/include/standard-headers/linux/virtio_net.h

index c0e797067aae..2325485f2ca8 100644
--- a/include/standard-headers/linux/virtio_net.h
+++ b/include/standard-headers/linux/virtio_net.h
@@ -61,6 +61,7 @@
  #define VIRTIO_NET_F_GUEST_USO6    55    /* Guest can handle USOv6 
in. */

  #define VIRTIO_NET_F_HOST_USO    56    /* Host can handle USO in. */
  #define VIRTIO_NET_F_HASH_REPORT  57    /* Supports hash report */
+#define VIRTIO_NET_F_GUEST_HDRLEN  59    /* Guest provides the exact 
hdr_len value. */

  #define VIRTIO_NET_F_RSS  60    /* Supports RSS RX steering */
  #define VIRTIO_NET_F_RSC_EXT  61    /* extended coalescing info */
  #define VIRTIO_NET_F_STANDBY  62    /* Act as standby for 
another device
diff --git a/linux-headers/asm-arm64/kvm.h 
b/linux-headers/asm-arm64/kvm.h

index d7e7bb885e20..38e595

Re: [PATCH] riscv: Generate devicetree only after machine initialization is complete

2023-07-03 Thread Daniel Henrique Barboza


On 7/3/23 00:46, Guenter Roeck wrote:

If the devicetree is created before machine initialization is complete,
it misses dynamic devices. Specifically, the tpm device is not added
to the devicetree file and is therefore not instantiated in Linux.
Create devicetree in virt_machine_done() to solve the problem.

Cc: Alistair Francis 
Fixes: 325b7c4e75 hw/riscv: Enable TPM backends
Signed-off-by: Guenter Roeck 
---
  hw/riscv/virt.c | 9 ++---
  1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index ed4c27487e..08876284f5 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1248,6 +1248,11 @@ static void virt_machine_done(Notifier *notifier, void 
*data)
  uint64_t kernel_entry = 0;
  BlockBackend *pflash_blk0;
  
+/* create devicetree if not provided */

+if (!machine->dtb) {
+create_fdt(s, memmap);
+}
+


I suggest moving the entire load/create DT code from virt_machine_init() to
the start of virt_machine_done():

/* load/create device tree */
if (machine->dtb) {
machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
if (!machine->fdt) {
error_report("load_device_tree() failed");
exit(1);
}
} else {
create_fdt(s, memmap);
}

This way we don't have to look in to 2 different functions to wonder what 
happens
in case machine->dtb is NULL.


Thanks,


Daniel



  /*
   * Only direct boot kernel is currently supported for KVM VM,
   * so the "-bios" parameter is not supported when KVM is enabled.
@@ -1508,15 +1513,13 @@ static void virt_machine_init(MachineState *machine)
  }
  virt_flash_map(s, system_memory);
  
-/* load/create device tree */

+/* load device tree */
  if (machine->dtb) {
  machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
  if (!machine->fdt) {
  error_report("load_device_tree() failed");
  exit(1);
  }
-} else {
-create_fdt(s, memmap);
  }
  
  s->machine_done.notify = virt_machine_done;

Re: [PATCH] riscv: Generate devicetree only after machine initialization is complete

2023-07-03 Thread Daniel Henrique Barboza





On 7/3/23 04:46, Philippe Mathieu-Daudé wrote:

On 3/7/23 05:46, Guenter Roeck wrote:

If the devicetree is created before machine initialization is complete,
it misses dynamic devices. Specifically, the tpm device is not added
to the devicetree file and is therefore not instantiated in Linux.
Create devicetree in virt_machine_done() to solve the problem.


This makes sense, but what about the other archs/machines?
Shouldn't we fix this generically?


As far as other archs goes I can say that ARM isn't affected by it because
the fdt creation is done by arm_load_dtb() during virt_machine_done time.
I'm not aware of how x86 handles TPM. And pseries/ppc64 does a completely
different thing (per usual).

Inside hw/riscv the only TPM capable board is 'virt'. So I think this patch
has an adequate scope.


Thanks,


Daniel





Cc: Alistair Francis 
Fixes: 325b7c4e75 hw/riscv: Enable TPM backends
Signed-off-by: Guenter Roeck 
---
  hw/riscv/virt.c | 9 ++---
  1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index ed4c27487e..08876284f5 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1248,6 +1248,11 @@ static void virt_machine_done(Notifier *notifier, void 
*data)
  uint64_t kernel_entry = 0;
  BlockBackend *pflash_blk0;
+    /* create devicetree if not provided */
+    if (!machine->dtb) {
+    create_fdt(s, memmap);
+    }
+
  /*
   * Only direct boot kernel is currently supported for KVM VM,
   * so the "-bios" parameter is not supported when KVM is enabled.
@@ -1508,15 +1513,13 @@ static void virt_machine_init(MachineState *machine)
  }
  virt_flash_map(s, system_memory);
-    /* load/create device tree */
+    /* load device tree */
  if (machine->dtb) {
  machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
  if (!machine->fdt) {
  error_report("load_device_tree() failed");
  exit(1);
  }
-    } else {
-    create_fdt(s, memmap);
  }
  s->machine_done.notify = virt_machine_done;

[PATCH v2 14/16] target/riscv: Move sysemu-specific code to sysemu/cpu_helper.c

2023-07-03 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/riscv/cpu_helper.c| 858 +-
 target/riscv/sysemu/cpu_helper.c | 863 +++
 target/riscv/sysemu/meson.build  |   1 +
 3 files changed, 865 insertions(+), 857 deletions(-)
 create mode 100644 target/riscv/sysemu/cpu_helper.c

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index f1d0cd1e64..900e3c2b5c 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -18,22 +18,12 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
-#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "internals.h"
-#include "sysemu/pmu.h"
-#include "exec/exec-all.h"
-#include "sysemu/instmap.h"
-#include "tcg/tcg-op.h"
-#include "trace.h"
-#include "semihosting/common-semi.h"
 #ifndef CONFIG_USER_ONLY
 #include "sysemu/cpu-timers.h"
 #endif
-#include "cpu_bits.h"
-#include "sysemu/debug.h"
-#include "tcg/oversized-guest.h"
+
 
 int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
 {
@@ -108,849 +98,3 @@ void riscv_cpu_update_mask(CPURISCVState *env)
 env->cur_pmbase = base;
 }
 }
-
-#ifndef CONFIG_USER_ONLY
-
-/*
- * The HS-mode is allowed to configure priority only for the
- * following VS-mode local interrupts:
- *
- * 0  (Reserved interrupt, reads as zero)
- * 1  Supervisor software interrupt
- * 4  (Reserved interrupt, reads as zero)
- * 5  Supervisor timer interrupt
- * 8  (Reserved interrupt, reads as zero)
- * 13 (Reserved interrupt)
- * 14 "
- * 15 "
- * 16 "
- * 17 "
- * 18 "
- * 19 "
- * 20 "
- * 21 "
- * 22 "
- * 23 "
- */
-
-static const int hviprio_index2irq[] = {
-0, 1, 4, 5, 8, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 };
-static const int hviprio_index2rdzero[] = {
-1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-
-int riscv_cpu_hviprio_index2irq(int index, int *out_irq, int *out_rdzero)
-{
-if (index < 0 || ARRAY_SIZE(hviprio_index2irq) <= index) {
-return -EINVAL;
-}
-
-if (out_irq) {
-*out_irq = hviprio_index2irq[index];
-}
-
-if (out_rdzero) {
-*out_rdzero = hviprio_index2rdzero[index];
-}
-
-return 0;
-}
-
-/*
- * Default priorities of local interrupts are defined in the
- * RISC-V Advanced Interrupt Architecture specification.
- *
- * 
- *  Default  |
- *  Priority | Major Interrupt Numbers
- * 
- *  Highest  | 47, 23, 46, 45, 22, 44,
- *   | 43, 21, 42, 41, 20, 40
- *   |
- *   | 11 (0b),  3 (03),  7 (07)
- *   |  9 (09),  1 (01),  5 (05)
- *   | 12 (0c)
- *   | 10 (0a),  2 (02),  6 (06)
- *   |
- *   | 39, 19, 38, 37, 18, 36,
- *  Lowest   | 35, 17, 34, 33, 16, 32
- * 
- */
-static const uint8_t default_iprio[64] = {
-/* Custom interrupts 48 to 63 */
-[63] = IPRIO_MMAXIPRIO,
-[62] = IPRIO_MMAXIPRIO,
-[61] = IPRIO_MMAXIPRIO,
-[60] = IPRIO_MMAXIPRIO,
-[59] = IPRIO_MMAXIPRIO,
-[58] = IPRIO_MMAXIPRIO,
-[57] = IPRIO_MMAXIPRIO,
-[56] = IPRIO_MMAXIPRIO,
-[55] = IPRIO_MMAXIPRIO,
-[54] = IPRIO_MMAXIPRIO,
-[53] = IPRIO_MMAXIPRIO,
-[52] = IPRIO_MMAXIPRIO,
-[51] = IPRIO_MMAXIPRIO,
-[50] = IPRIO_MMAXIPRIO,
-[49] = IPRIO_MMAXIPRIO,
-[48] = IPRIO_MMAXIPRIO,
-
-/* Custom interrupts 24 to 31 */
-[31] = IPRIO_MMAXIPRIO,
-[30] = IPRIO_MMAXIPRIO,
-[29] = IPRIO_MMAXIPRIO,
-[28] = IPRIO_MMAXIPRIO,
-[27] = IPRIO_MMAXIPRIO,
-[26] = IPRIO_MMAXIPRIO,
-[25] = IPRIO_MMAXIPRIO,
-[24] = IPRIO_MMAXIPRIO,
-
-[47] = IPRIO_DEFAULT_UPPER,
-[23] = IPRIO_DEFAULT_UPPER + 1,
-[46] = IPRIO_DEFAULT_UPPER + 2,
-[45] = IPRIO_DEFAULT_UPPER + 3,
-[22] = IPRIO_DEFAULT_UPPER + 4,
-[44] = IPRIO_DEFAULT_UPPER + 5,
-
-[43] = IPRIO_DEFAULT_UPPER + 6,
-[21] = IPRIO_DEFAULT_UPPER + 7,
-[42] = IPRIO_DEFAULT_UPPER + 8,
-[41] = IPRIO_DEFAULT_UPPER + 9,
-[20] = IPRIO_DEFAULT_UPPER + 10,
-[40] = IPRIO_DEFAULT_UPPER + 11,
-
-[11] = IPRIO_DEFAULT_M,
-[3]  = IPRIO_DEFAULT_M + 1,
-[7]  = IPRIO_DEFAULT_M + 2,
-
-[9]  = IPRIO_DEFAULT_S,
-[1]  = IPRIO_DEFAULT_S + 1,
-[5]  = IPRIO_DEFAULT_S + 2,
-
-[12] = IPRIO_DEFAULT_SGEXT,
-
-[10] = IPRIO_DEFAULT_VS,
-[2]  = IPRIO_DEFAULT_VS + 1,
-[6]  = IPRIO_DEFAULT_VS + 2,
-
-[39] = IPRIO_DEFAULT_LOWER,
-[19] = IPRIO_DEFAULT_LOWER + 1,
-[38] = IPRIO_DEFAULT_LOWER + 2,
-[37] = IPRIO_DEFAULT_LOWER + 3,
-[18] = IPRIO_DEFAULT_LOWER + 4,
-[36] = IPRIO_DEFAULT_LOWER + 5,
-
-[35] = IPRIO_DEFAULT_LOWER + 6,
-[17] = IPRIO_DEFAULT_LOWER + 7,
-[34] = IPRIO_DEFAULT_LOWER + 8,
-[33] = IPRIO_DEFAULT_LOWER + 9,
-[16] = IPRIO_DEFAULT_LOWER + 10,
-[32] = IPRIO_DEFAULT_LOWER + 11,
-};
-
-uint8_t riscv_cpu_default_priority(int irq)
-{

[PATCH v2 12/16] target/riscv: Expose riscv_cpu_pending_to_irq() from cpu_helper.c

2023-07-03 Thread Philippe Mathieu-Daudé

We want to extract TCG/sysemu-specific code from cpu_helper.c,
but some functions call riscv_cpu_pending_to_irq(). Expose the
prototype in "internals.h".

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/riscv/internals.h  | 4 
 target/riscv/cpu_helper.c | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index b5f823c7ec..b6881b4815 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -72,6 +72,10 @@ target_ulong fclass_d(uint64_t frs1);
 
 #ifndef CONFIG_USER_ONLY
 extern const VMStateDescription vmstate_riscv_cpu;
+
+int riscv_cpu_pending_to_irq(CPURISCVState *env,
+ int extirq, unsigned int extirq_def_prio,
+ uint64_t pending, uint8_t *iprio);
 #endif
 
 enum {
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 6c773000a5..e73cf56e5c 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -256,9 +256,9 @@ uint8_t riscv_cpu_default_priority(int irq)
 return default_iprio[irq] ? default_iprio[irq] : IPRIO_MMAXIPRIO;
 };
 
-static int riscv_cpu_pending_to_irq(CPURISCVState *env,
-int extirq, unsigned int extirq_def_prio,
-uint64_t pending, uint8_t *iprio)
+int riscv_cpu_pending_to_irq(CPURISCVState *env,
+ int extirq, unsigned int extirq_def_prio,
+ uint64_t pending, uint8_t *iprio)
 {
 int irq, best_irq = RISCV_EXCP_NONE;
 unsigned int prio, best_prio = UINT_MAX;
-- 
2.38.1

[PATCH v2 15/16] target/riscv: Restrict TCG-specific prototype declarations

2023-07-03 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/riscv/cpu.h |  3 +++
 target/riscv/cpu.c | 11 +++
 2 files changed, 14 insertions(+)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 42bd7efe4c..ab1968deb7 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -474,7 +474,10 @@ RISCVException smstateen_acc_ok(CPURISCVState *env, int 
index, uint64_t bit);
 
 void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv);
 
+#ifdef CONFIG_TCG
 void riscv_translate_init(void);
+#endif
+
 G_NORETURN void riscv_raise_exception(CPURISCVState *env,
   uint32_t exception, uintptr_t pc);
 
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index cd01af3595..31ca1a4ff9 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -37,7 +37,9 @@
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
 #include "fpu/softfloat-helpers.h"
+#ifdef CONFIG_TCG
 #include "tcg/tcg.h"
+#endif
 
 /* RISC-V CPU definitions */
 
@@ -785,6 +787,7 @@ static vaddr riscv_cpu_get_pc(CPUState *cs)
 return env->pc;
 }
 
+#ifdef CONFIG_TCG
 static void riscv_cpu_synchronize_from_tb(CPUState *cs,
   const TranslationBlock *tb)
 {
@@ -802,6 +805,7 @@ static void riscv_cpu_synchronize_from_tb(CPUState *cs,
 }
 }
 }
+#endif
 
 static bool riscv_cpu_has_work(CPUState *cs)
 {
@@ -818,6 +822,7 @@ static bool riscv_cpu_has_work(CPUState *cs)
 #endif
 }
 
+#ifdef CONFIG_TCG
 static void riscv_restore_state_to_opc(CPUState *cs,
const TranslationBlock *tb,
const uint64_t *data)
@@ -840,6 +845,7 @@ static void riscv_restore_state_to_opc(CPUState *cs,
 }
 env->bins = data[1];
 }
+#endif
 
 static void riscv_cpu_reset_hold(Object *obj)
 {
@@ -1871,6 +1877,8 @@ static const struct SysemuCPUOps riscv_sysemu_ops = {
 };
 #endif
 
+#ifdef CONFIG_TCG
+
 #include "hw/core/tcg-cpu-ops.h"
 
 static const struct TCGCPUOps riscv_tcg_ops = {
@@ -1889,6 +1897,7 @@ static const struct TCGCPUOps riscv_tcg_ops = {
 .debug_check_watchpoint = riscv_cpu_debug_check_watchpoint,
 #endif /* !CONFIG_USER_ONLY */
 };
+#endif /* CONFIG_TCG */
 
 static void riscv_cpu_class_init(ObjectClass *c, void *data)
 {
@@ -1919,7 +1928,9 @@ static void riscv_cpu_class_init(ObjectClass *c, void 
*data)
 #endif
 cc->gdb_arch_name = riscv_gdb_arch_name;
 cc->gdb_get_dynamic_xml = riscv_gdb_get_dynamic_xml;
+#ifdef CONFIG_TCG
 cc->tcg_ops = &riscv_tcg_ops;
+#endif /* CONFIG_TCG */
 
 device_class_set_props(dc, riscv_cpu_properties);
 }
-- 
2.38.1

[PATCH v2 03/16] target/riscv: Restrict sysemu specific header to user emulation

2023-07-03 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Daniel Henrique Barboza 
Acked-by: Alistair Francis 
---
 target/riscv/cpu.c| 8 +---
 target/riscv/cpu_helper.c | 2 ++
 target/riscv/csr.c| 2 ++
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index fd647534cf..174003348f 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -23,9 +23,13 @@
 #include "qemu/log.h"
 #include "cpu.h"
 #include "cpu_vendorid.h"
+#ifndef CONFIG_USER_ONLY
 #include "pmu.h"
-#include "internals.h"
 #include "time_helper.h"
+#include "sysemu/kvm.h"
+#include "kvm_riscv.h"
+#endif
+#include "internals.h"
 #include "exec/exec-all.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
@@ -33,8 +37,6 @@
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
 #include "fpu/softfloat-helpers.h"
-#include "sysemu/kvm.h"
-#include "kvm_riscv.h"
 #include "tcg/tcg.h"
 
 /* RISC-V CPU definitions */
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 9f611d89bb..e8b7f70be3 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -28,7 +28,9 @@
 #include "tcg/tcg-op.h"
 #include "trace.h"
 #include "semihosting/common-semi.h"
+#ifndef CONFIG_USER_ONLY
 #include "sysemu/cpu-timers.h"
+#endif
 #include "cpu_bits.h"
 #include "debug.h"
 #include "tcg/oversized-guest.h"
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index ea7585329e..e5737dcf58 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -21,8 +21,10 @@
 #include "qemu/log.h"
 #include "qemu/timer.h"
 #include "cpu.h"
+#ifndef CONFIG_USER_ONLY
 #include "pmu.h"
 #include "time_helper.h"
+#endif
 #include "qemu/main-loop.h"
 #include "exec/exec-all.h"
 #include "exec/tb-flush.h"
-- 
2.38.1

[PATCH v2 04/16] target/riscv: Restrict 'rv128' machine to TCG accelerator

2023-07-03 Thread Philippe Mathieu-Daudé

We only build for 32/64-bit hosts, so TCG is required for
128-bit targets.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Daniel Henrique Barboza 
Acked-by: Alistair Francis 
---
 target/riscv/cpu.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 174003348f..78ab61c274 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -498,6 +498,7 @@ static void rv64_veyron_v1_cpu_init(Object *obj)
 #endif
 }
 
+#ifdef CONFIG_TCG
 static void rv128_base_cpu_init(Object *obj)
 {
 if (qemu_tcg_mttcg_enabled()) {
@@ -516,7 +517,10 @@ static void rv128_base_cpu_init(Object *obj)
 set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV57);
 #endif
 }
-#else
+#endif
+
+#else /* !TARGET_RISCV64 */
+
 static void rv32_base_cpu_init(Object *obj)
 {
 CPURISCVState *env = &RISCV_CPU(obj)->env;
@@ -598,7 +602,7 @@ static void rv32_imafcu_nommu_cpu_init(Object *obj)
 cpu->cfg.ext_icsr = true;
 cpu->cfg.pmp = true;
 }
-#endif
+#endif /* !TARGET_RISCV64 */
 
 #if defined(CONFIG_KVM)
 static void riscv_host_cpu_init(Object *obj)
@@ -2033,8 +2037,10 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 DEFINE_CPU(TYPE_RISCV_CPU_SHAKTI_C, rv64_sifive_u_cpu_init),
 DEFINE_CPU(TYPE_RISCV_CPU_THEAD_C906,   rv64_thead_c906_cpu_init),
 DEFINE_CPU(TYPE_RISCV_CPU_VEYRON_V1,rv64_veyron_v1_cpu_init),
+#ifdef CONFIG_TCG
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE128,  rv128_base_cpu_init),
-#endif
+#endif /* CONFIG_TCG */
+#endif /* TARGET_RISCV64 */
 };
 
 DEFINE_TYPES(riscv_cpu_type_infos)
-- 
2.38.1

[PATCH v2 01/16] target/riscv: Remove unuseful KVM stubs

2023-07-03 Thread Philippe Mathieu-Daudé

Since we always check whether KVM is enabled before calling
kvm_riscv_reset_vcpu() and kvm_riscv_set_irq(), their call
is elided by the compiler when KVM is not available.
Therefore the stubs are not even linked. Remove them.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Daniel Henrique Barboza 
Tested-by: Daniel Henrique Barboza 
Reviewed-by: Alistair Francis 
---
 target/riscv/kvm-stub.c  | 30 --
 target/riscv/kvm.c   |  4 +---
 target/riscv/meson.build |  2 +-
 3 files changed, 2 insertions(+), 34 deletions(-)
 delete mode 100644 target/riscv/kvm-stub.c

diff --git a/target/riscv/kvm-stub.c b/target/riscv/kvm-stub.c
deleted file mode 100644
index 4e8fc31a21..00
--- a/target/riscv/kvm-stub.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * QEMU KVM RISC-V specific function stubs
- *
- * Copyright (c) 2020 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2 or later, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see .
- */
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "kvm_riscv.h"
-
-void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
-{
-abort();
-}
-
-void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
-{
-abort();
-}
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 0f932a5b96..52884bbe15 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -503,9 +503,7 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
 {
 CPURISCVState *env = &cpu->env;
 
-if (!kvm_enabled()) {
-return;
-}
+assert(kvm_enabled());
 env->pc = cpu->env.kernel_addr;
 env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
 env->gpr[11] = cpu->env.fdt_addr;  /* a1 */
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index 7f56c5f88d..e3ab3df4e5 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -22,7 +22,7 @@ riscv_ss.add(files(
   'crypto_helper.c',
   'zce_helper.c'
 ))
-riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: 
files('kvm-stub.c'))
+riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'))
 
 riscv_system_ss = ss.source_set()
 riscv_system_ss.add(files(
-- 
2.38.1

[PATCH v2 07/16] target/riscv: Move TCG-specific files to target/riscv/tcg/

2023-07-03 Thread Philippe Mathieu-Daudé

Move TCG-specific files to the a new 'tcg' sub-directory. Add
stubs for riscv_cpu_[get/set]_fflags and riscv_raise_exception().
Adapt meson rules.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/riscv/{ => tcg}/XVentanaCondOps.decode |  0
 target/riscv/{ => tcg}/insn16.decode  |  0
 target/riscv/{ => tcg}/insn32.decode  |  0
 target/riscv/{ => tcg}/xthead.decode  |  0
 target/riscv/{ => tcg}/bitmanip_helper.c  |  0
 target/riscv/{ => tcg}/crypto_helper.c|  0
 target/riscv/{ => tcg}/fpu_helper.c   |  0
 target/riscv/{ => tcg}/m128_helper.c  |  0
 target/riscv/{ => tcg}/op_helper.c|  0
 target/riscv/tcg/tcg-stub.c   | 25 +++
 target/riscv/{ => tcg}/translate.c|  0
 target/riscv/{ => tcg}/vector_helper.c|  0
 target/riscv/{ => tcg}/zce_helper.c   |  0
 target/riscv/meson.build  | 18 +
 target/riscv/tcg/meson.build  | 19 ++
 15 files changed, 45 insertions(+), 17 deletions(-)
 rename target/riscv/{ => tcg}/XVentanaCondOps.decode (100%)
 rename target/riscv/{ => tcg}/insn16.decode (100%)
 rename target/riscv/{ => tcg}/insn32.decode (100%)
 rename target/riscv/{ => tcg}/xthead.decode (100%)
 rename target/riscv/{ => tcg}/bitmanip_helper.c (100%)
 rename target/riscv/{ => tcg}/crypto_helper.c (100%)
 rename target/riscv/{ => tcg}/fpu_helper.c (100%)
 rename target/riscv/{ => tcg}/m128_helper.c (100%)
 rename target/riscv/{ => tcg}/op_helper.c (100%)
 create mode 100644 target/riscv/tcg/tcg-stub.c
 rename target/riscv/{ => tcg}/translate.c (100%)
 rename target/riscv/{ => tcg}/vector_helper.c (100%)
 rename target/riscv/{ => tcg}/zce_helper.c (100%)
 create mode 100644 target/riscv/tcg/meson.build

diff --git a/target/riscv/XVentanaCondOps.decode 
b/target/riscv/tcg/XVentanaCondOps.decode
similarity index 100%
rename from target/riscv/XVentanaCondOps.decode
rename to target/riscv/tcg/XVentanaCondOps.decode
diff --git a/target/riscv/insn16.decode b/target/riscv/tcg/insn16.decode
similarity index 100%
rename from target/riscv/insn16.decode
rename to target/riscv/tcg/insn16.decode
diff --git a/target/riscv/insn32.decode b/target/riscv/tcg/insn32.decode
similarity index 100%
rename from target/riscv/insn32.decode
rename to target/riscv/tcg/insn32.decode
diff --git a/target/riscv/xthead.decode b/target/riscv/tcg/xthead.decode
similarity index 100%
rename from target/riscv/xthead.decode
rename to target/riscv/tcg/xthead.decode
diff --git a/target/riscv/bitmanip_helper.c b/target/riscv/tcg/bitmanip_helper.c
similarity index 100%
rename from target/riscv/bitmanip_helper.c
rename to target/riscv/tcg/bitmanip_helper.c
diff --git a/target/riscv/crypto_helper.c b/target/riscv/tcg/crypto_helper.c
similarity index 100%
rename from target/riscv/crypto_helper.c
rename to target/riscv/tcg/crypto_helper.c
diff --git a/target/riscv/fpu_helper.c b/target/riscv/tcg/fpu_helper.c
similarity index 100%
rename from target/riscv/fpu_helper.c
rename to target/riscv/tcg/fpu_helper.c
diff --git a/target/riscv/m128_helper.c b/target/riscv/tcg/m128_helper.c
similarity index 100%
rename from target/riscv/m128_helper.c
rename to target/riscv/tcg/m128_helper.c
diff --git a/target/riscv/op_helper.c b/target/riscv/tcg/op_helper.c
similarity index 100%
rename from target/riscv/op_helper.c
rename to target/riscv/tcg/op_helper.c
diff --git a/target/riscv/tcg/tcg-stub.c b/target/riscv/tcg/tcg-stub.c
new file mode 100644
index 00..dfe42ae2ac
--- /dev/null
+++ b/target/riscv/tcg/tcg-stub.c
@@ -0,0 +1,25 @@
+/*
+ * QEMU RISC-V TCG stubs
+ *
+ * Copyright (c) 2023 Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+
+target_ulong riscv_cpu_get_fflags(CPURISCVState *env)
+{
+g_assert_not_reached();
+}
+
+void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong)
+{
+g_assert_not_reached();
+}
+
+G_NORETURN void riscv_raise_exception(CPURISCVState *env,
+  uint32_t exception, uintptr_t pc)
+{
+g_assert_not_reached();
+}
diff --git a/target/riscv/translate.c b/target/riscv/tcg/translate.c
similarity index 100%
rename from target/riscv/translate.c
rename to target/riscv/tcg/translate.c
diff --git a/target/riscv/vector_helper.c b/target/riscv/tcg/vector_helper.c
similarity index 100%
rename from target/riscv/vector_helper.c
rename to target/riscv/tcg/vector_helper.c
diff --git a/target/riscv/zce_helper.c b/target/riscv/tcg/zce_helper.c
similarity index 100%
rename from target/riscv/zce_helper.c
rename to target/riscv/tcg/zce_helper.c
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index 8967dfaded..8ef47f43f9 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -1,34 +1,18 @@
-# FIXME extra_args should accept files()
-gen = [
-  decodetree.process('insn16.decode', extra_args: 
['--static-decode=decode_insn16', '--insnwidth=16']),
-

[PATCH v2 05/16] target/riscv: Move sysemu-specific files to target/riscv/sysemu/

2023-07-03 Thread Philippe Mathieu-Daudé

Move sysemu-specific files to the a new 'sysemu' sub-directory,
adapt meson rules.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Daniel Henrique Barboza 
Acked-by: Alistair Francis 
---
 target/riscv/cpu.h |  2 +-
 target/riscv/{ => sysemu}/instmap.h|  0
 target/riscv/{ => sysemu}/kvm_riscv.h  |  0
 target/riscv/{ => sysemu}/pmp.h|  0
 target/riscv/{ => sysemu}/pmu.h|  0
 target/riscv/{ => sysemu}/time_helper.h|  0
 hw/riscv/virt.c|  2 +-
 target/riscv/cpu.c |  6 ++---
 target/riscv/cpu_helper.c  |  4 +--
 target/riscv/csr.c |  4 +--
 target/riscv/{ => sysemu}/arch_dump.c  |  0
 target/riscv/sysemu/kvm-stub.c | 30 ++
 target/riscv/{ => sysemu}/kvm.c|  0
 target/riscv/{ => sysemu}/machine.c|  0
 target/riscv/{ => sysemu}/monitor.c|  0
 target/riscv/{ => sysemu}/pmp.c|  0
 target/riscv/{ => sysemu}/pmu.c|  0
 target/riscv/{ => sysemu}/riscv-qmp-cmds.c |  0
 target/riscv/{ => sysemu}/time_helper.c|  0
 target/riscv/meson.build   | 13 +++---
 target/riscv/sysemu/meson.build| 11 
 21 files changed, 54 insertions(+), 18 deletions(-)
 rename target/riscv/{ => sysemu}/instmap.h (100%)
 rename target/riscv/{ => sysemu}/kvm_riscv.h (100%)
 rename target/riscv/{ => sysemu}/pmp.h (100%)
 rename target/riscv/{ => sysemu}/pmu.h (100%)
 rename target/riscv/{ => sysemu}/time_helper.h (100%)
 rename target/riscv/{ => sysemu}/arch_dump.c (100%)
 create mode 100644 target/riscv/sysemu/kvm-stub.c
 rename target/riscv/{ => sysemu}/kvm.c (100%)
 rename target/riscv/{ => sysemu}/machine.c (100%)
 rename target/riscv/{ => sysemu}/monitor.c (100%)
 rename target/riscv/{ => sysemu}/pmp.c (100%)
 rename target/riscv/{ => sysemu}/pmu.c (100%)
 rename target/riscv/{ => sysemu}/riscv-qmp-cmds.c (100%)
 rename target/riscv/{ => sysemu}/time_helper.c (100%)
 create mode 100644 target/riscv/sysemu/meson.build

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 3081603464..00a4842d84 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -88,7 +88,7 @@ typedef enum {
 #define MAX_RISCV_PMPS (16)
 
 #if !defined(CONFIG_USER_ONLY)
-#include "pmp.h"
+#include "sysemu/pmp.h"
 #include "debug.h"
 #endif
 
diff --git a/target/riscv/instmap.h b/target/riscv/sysemu/instmap.h
similarity index 100%
rename from target/riscv/instmap.h
rename to target/riscv/sysemu/instmap.h
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/sysemu/kvm_riscv.h
similarity index 100%
rename from target/riscv/kvm_riscv.h
rename to target/riscv/sysemu/kvm_riscv.h
diff --git a/target/riscv/pmp.h b/target/riscv/sysemu/pmp.h
similarity index 100%
rename from target/riscv/pmp.h
rename to target/riscv/sysemu/pmp.h
diff --git a/target/riscv/pmu.h b/target/riscv/sysemu/pmu.h
similarity index 100%
rename from target/riscv/pmu.h
rename to target/riscv/sysemu/pmu.h
diff --git a/target/riscv/time_helper.h b/target/riscv/sysemu/time_helper.h
similarity index 100%
rename from target/riscv/time_helper.h
rename to target/riscv/sysemu/time_helper.h
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 8ff4b5fd71..8f6b63ad07 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -30,7 +30,7 @@
 #include "hw/char/serial.h"
 #include "target/riscv/cpu.h"
 #include "hw/core/sysbus-fdt.h"
-#include "target/riscv/pmu.h"
+#include "target/riscv/sysemu/pmu.h"
 #include "hw/riscv/riscv_hart.h"
 #include "hw/riscv/virt.h"
 #include "hw/riscv/boot.h"
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 78ab61c274..cd01af3595 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -24,10 +24,10 @@
 #include "cpu.h"
 #include "cpu_vendorid.h"
 #ifndef CONFIG_USER_ONLY
-#include "pmu.h"
-#include "time_helper.h"
+#include "sysemu/pmu.h"
+#include "sysemu/time_helper.h"
 #include "sysemu/kvm.h"
-#include "kvm_riscv.h"
+#include "sysemu/kvm_riscv.h"
 #endif
 #include "internals.h"
 #include "exec/exec-all.h"
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index e8b7f70be3..0adde26321 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -22,9 +22,9 @@
 #include "qemu/main-loop.h"
 #include "cpu.h"
 #include "internals.h"
-#include "pmu.h"
+#include "sysemu/pmu.h"
 #include "exec/exec-all.h"
-#include "instmap.h"
+#include "sysemu/instmap.h"
 #include "tcg/tcg-op.h"
 #include "trace.h"
 #include "semihosting/common-semi.h"
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index e5737dcf58..29151429ee 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -22,8 +22,8 @@
 #include "qemu/timer.h"
 #include "cpu.h"
 #ifndef CONFIG_USER_ONLY
-#include "pmu.h"
-#include "time_helper.h"
+#include "sysemu/pmu.h"
+#include "sysemu/time_helper.h"
 #endif
 #include "qemu/main-loop.h"
 #include "exec/exec-all.h"
diff --git a/target/riscv/arch_dump.c b/target/

[PATCH v2 09/16] target/riscv: Expose some 'trigger' prototypes from debug.c

2023-07-03 Thread Philippe Mathieu-Daudé

We want to extract TCG-specific code from debug.c, but some
functions call get_trigger_type() / do_trigger_action().
Expose these prototypes in "debug.h".

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/riscv/debug.h | 4 
 target/riscv/debug.c | 5 ++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/target/riscv/debug.h b/target/riscv/debug.h
index c471748d5a..65cd45b8f3 100644
--- a/target/riscv/debug.h
+++ b/target/riscv/debug.h
@@ -147,4 +147,8 @@ void riscv_trigger_init(CPURISCVState *env);
 
 bool riscv_itrigger_enabled(CPURISCVState *env);
 void riscv_itrigger_update_priv(CPURISCVState *env);
+
+target_ulong get_trigger_type(CPURISCVState *env, target_ulong trigger_index);
+void do_trigger_action(CPURISCVState *env, target_ulong trigger_index);
+
 #endif /* RISCV_DEBUG_H */
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
index 75ee1c4971..5676f2c57e 100644
--- a/target/riscv/debug.c
+++ b/target/riscv/debug.c
@@ -88,8 +88,7 @@ static inline target_ulong extract_trigger_type(CPURISCVState 
*env,
 }
 }
 
-static inline target_ulong get_trigger_type(CPURISCVState *env,
-target_ulong trigger_index)
+target_ulong get_trigger_type(CPURISCVState *env, target_ulong trigger_index)
 {
 return extract_trigger_type(env, env->tdata1[trigger_index]);
 }
@@ -217,7 +216,7 @@ static inline void warn_always_zero_bit(target_ulong val, 
target_ulong mask,
 }
 }
 
-static void do_trigger_action(CPURISCVState *env, target_ulong trigger_index)
+void do_trigger_action(CPURISCVState *env, target_ulong trigger_index)
 {
 trigger_action_t action = get_trigger_action(env, trigger_index);
 
-- 
2.38.1

[PATCH v2 16/16] gitlab-ci.d/crossbuilds: Add KVM riscv64 cross-build jobs

2023-07-03 Thread Philippe Mathieu-Daudé

Add a new job to cross-build the riscv64 target without
the TCG accelerator (IOW: only KVM accelerator enabled).

Signed-off-by: Philippe Mathieu-Daudé 
---
 .gitlab-ci.d/crossbuilds.yml | 8 
 1 file changed, 8 insertions(+)

diff --git a/.gitlab-ci.d/crossbuilds.yml b/.gitlab-ci.d/crossbuilds.yml
index b6ec99ecd1..588ef4ebcb 100644
--- a/.gitlab-ci.d/crossbuilds.yml
+++ b/.gitlab-ci.d/crossbuilds.yml
@@ -129,6 +129,14 @@ cross-riscv64-user:
   variables:
 IMAGE: debian-riscv64-cross
 
+cross-riscv64-kvm-only:
+  extends: .cross_accel_build_job
+  needs:
+job: riscv64-debian-cross-container
+  variables:
+IMAGE: debian-riscv64-cross
+EXTRA_CONFIGURE_OPTS: --disable-tcg --without-default-features
+
 cross-s390x-system:
   extends: .cross_system_build_job
   needs:
-- 
2.38.1

[PATCH v2 08/16] target/riscv: Move TCG-specific cpu_get_tb_cpu_state() to tcg/cpu.c

2023-07-03 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/riscv/cpu_helper.c| 84 ---
 target/riscv/tcg/cpu.c   | 98 
 target/riscv/tcg/meson.build |  1 +
 3 files changed, 99 insertions(+), 84 deletions(-)
 create mode 100644 target/riscv/tcg/cpu.c

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 597c47bc56..6f8778c6d3 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -64,90 +64,6 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
 #endif
 }
 
-void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc,
-  uint64_t *cs_base, uint32_t *pflags)
-{
-CPUState *cs = env_cpu(env);
-RISCVCPU *cpu = RISCV_CPU(cs);
-RISCVExtStatus fs, vs;
-uint32_t flags = 0;
-
-*pc = env->xl == MXL_RV32 ? env->pc & UINT32_MAX : env->pc;
-*cs_base = 0;
-
-if (cpu->cfg.ext_zve32f) {
-/*
- * If env->vl equals to VLMAX, we can use generic vector operation
- * expanders (GVEC) to accerlate the vector operations.
- * However, as LMUL could be a fractional number. The maximum
- * vector size can be operated might be less than 8 bytes,
- * which is not supported by GVEC. So we set vl_eq_vlmax flag to true
- * only when maxsz >= 8 bytes.
- */
-uint32_t vlmax = vext_get_vlmax(cpu, env->vtype);
-uint32_t sew = FIELD_EX64(env->vtype, VTYPE, VSEW);
-uint32_t maxsz = vlmax << sew;
-bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl) &&
-   (maxsz >= 8);
-flags = FIELD_DP32(flags, TB_FLAGS, VILL, env->vill);
-flags = FIELD_DP32(flags, TB_FLAGS, SEW, sew);
-flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
-   FIELD_EX64(env->vtype, VTYPE, VLMUL));
-flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
-flags = FIELD_DP32(flags, TB_FLAGS, VTA,
-   FIELD_EX64(env->vtype, VTYPE, VTA));
-flags = FIELD_DP32(flags, TB_FLAGS, VMA,
-   FIELD_EX64(env->vtype, VTYPE, VMA));
-flags = FIELD_DP32(flags, TB_FLAGS, VSTART_EQ_ZERO, env->vstart == 0);
-} else {
-flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
-}
-
-#ifdef CONFIG_USER_ONLY
-fs = EXT_STATUS_DIRTY;
-vs = EXT_STATUS_DIRTY;
-#else
-flags = FIELD_DP32(flags, TB_FLAGS, PRIV, env->priv);
-
-flags |= cpu_mmu_index(env, 0);
-fs = get_field(env->mstatus, MSTATUS_FS);
-vs = get_field(env->mstatus, MSTATUS_VS);
-
-if (env->virt_enabled) {
-flags = FIELD_DP32(flags, TB_FLAGS, VIRT_ENABLED, 1);
-/*
- * Merge DISABLED and !DIRTY states using MIN.
- * We will set both fields when dirtying.
- */
-fs = MIN(fs, get_field(env->mstatus_hs, MSTATUS_FS));
-vs = MIN(vs, get_field(env->mstatus_hs, MSTATUS_VS));
-}
-
-/* With Zfinx, floating point is enabled/disabled by Smstateen. */
-if (!riscv_has_ext(env, RVF)) {
-fs = (smstateen_acc_ok(env, 0, SMSTATEEN0_FCSR) == RISCV_EXCP_NONE)
- ? EXT_STATUS_DIRTY : EXT_STATUS_DISABLED;
-}
-
-if (cpu->cfg.debug && !icount_enabled()) {
-flags = FIELD_DP32(flags, TB_FLAGS, ITRIGGER, env->itrigger_enabled);
-}
-#endif
-
-flags = FIELD_DP32(flags, TB_FLAGS, FS, fs);
-flags = FIELD_DP32(flags, TB_FLAGS, VS, vs);
-flags = FIELD_DP32(flags, TB_FLAGS, XL, env->xl);
-flags = FIELD_DP32(flags, TB_FLAGS, AXL, cpu_address_xl(env));
-if (env->cur_pmmask != 0) {
-flags = FIELD_DP32(flags, TB_FLAGS, PM_MASK_ENABLED, 1);
-}
-if (env->cur_pmbase != 0) {
-flags = FIELD_DP32(flags, TB_FLAGS, PM_BASE_ENABLED, 1);
-}
-
-*pflags = flags;
-}
-
 void riscv_cpu_update_mask(CPURISCVState *env)
 {
 target_ulong mask = 0, base = 0;
diff --git a/target/riscv/tcg/cpu.c b/target/riscv/tcg/cpu.c
new file mode 100644
index 00..2ae6919b80
--- /dev/null
+++ b/target/riscv/tcg/cpu.c
@@ -0,0 +1,98 @@
+/*
+ * RISC-V CPU helpers (TCG specific)
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sag...@eecs.berkeley.edu
+ * Copyright (c) 2017-2018 SiFive, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#ifndef CONFIG_USER_ONLY
+#include "sysemu/cpu-timers.h"
+#endif
+
+void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc,
+  uint64_t *cs_base, uint32_t *pflags)
+{
+CPUState *cs = env_cpu(env);
+RISCVCPU *cpu = RISCV_CPU(cs);
+RISCVExtStatus fs, vs;
+uint32_t flags = 0;
+
+*pc = env->xl == MXL_RV32 ? env->pc & UINT32_MAX : env->pc;
+*cs_base = 0;
+
+if (cpu->cfg.ext_zve32f) {
+/*
+ * If env->vl equals to VLMAX, we can use generic vector operation
+ * expanders (GVEC) to accerlate the vector operations.
+ * However, as LMUL could be a fractional number

[PATCH v2 10/16] target/riscv: Extract TCG-specific code from debug.c

2023-07-03 Thread Philippe Mathieu-Daudé

Extract TCG-specific code from debug.c to tcg/sysemu/debug.c,
restrict the prototypes to TCG, adapt meson rules.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/riscv/debug.h|   2 +
 target/riscv/debug.c| 148 -
 target/riscv/tcg/sysemu/debug.c | 165 
 target/riscv/tcg/meson.build|   2 +
 target/riscv/tcg/sysemu/meson.build |   3 +
 5 files changed, 172 insertions(+), 148 deletions(-)
 create mode 100644 target/riscv/tcg/sysemu/debug.c
 create mode 100644 target/riscv/tcg/sysemu/meson.build

diff --git a/target/riscv/debug.h b/target/riscv/debug.h
index 65cd45b8f3..0b3bdd5be1 100644
--- a/target/riscv/debug.h
+++ b/target/riscv/debug.h
@@ -139,9 +139,11 @@ void tdata_csr_write(CPURISCVState *env, int tdata_index, 
target_ulong val);
 
 target_ulong tinfo_csr_read(CPURISCVState *env);
 
+#ifdef CONFIG_TCG
 void riscv_cpu_debug_excp_handler(CPUState *cs);
 bool riscv_cpu_debug_check_breakpoint(CPUState *cs);
 bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp);
+#endif
 
 void riscv_trigger_init(CPURISCVState *env);
 
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
index 5676f2c57e..45a2605d8a 100644
--- a/target/riscv/debug.c
+++ b/target/riscv/debug.c
@@ -754,154 +754,6 @@ target_ulong tinfo_csr_read(CPURISCVState *env)
BIT(TRIGGER_TYPE_AD_MATCH6);
 }
 
-void riscv_cpu_debug_excp_handler(CPUState *cs)
-{
-RISCVCPU *cpu = RISCV_CPU(cs);
-CPURISCVState *env = &cpu->env;
-
-if (cs->watchpoint_hit) {
-if (cs->watchpoint_hit->flags & BP_CPU) {
-do_trigger_action(env, DBG_ACTION_BP);
-}
-} else {
-if (cpu_breakpoint_test(cs, env->pc, BP_CPU)) {
-do_trigger_action(env, DBG_ACTION_BP);
-}
-}
-}
-
-bool riscv_cpu_debug_check_breakpoint(CPUState *cs)
-{
-RISCVCPU *cpu = RISCV_CPU(cs);
-CPURISCVState *env = &cpu->env;
-CPUBreakpoint *bp;
-target_ulong ctrl;
-target_ulong pc;
-int trigger_type;
-int i;
-
-QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-for (i = 0; i < RV_MAX_TRIGGERS; i++) {
-trigger_type = get_trigger_type(env, i);
-
-switch (trigger_type) {
-case TRIGGER_TYPE_AD_MATCH:
-/* type 2 trigger cannot be fired in VU/VS mode */
-if (env->virt_enabled) {
-return false;
-}
-
-ctrl = env->tdata1[i];
-pc = env->tdata2[i];
-
-if ((ctrl & TYPE2_EXEC) && (bp->pc == pc)) {
-/* check U/S/M bit against current privilege level */
-if ((ctrl >> 3) & BIT(env->priv)) {
-return true;
-}
-}
-break;
-case TRIGGER_TYPE_AD_MATCH6:
-ctrl = env->tdata1[i];
-pc = env->tdata2[i];
-
-if ((ctrl & TYPE6_EXEC) && (bp->pc == pc)) {
-if (env->virt_enabled) {
-/* check VU/VS bit against current privilege level */
-if ((ctrl >> 23) & BIT(env->priv)) {
-return true;
-}
-} else {
-/* check U/S/M bit against current privilege level */
-if ((ctrl >> 3) & BIT(env->priv)) {
-return true;
-}
-}
-}
-break;
-default:
-/* other trigger types are not supported or irrelevant */
-break;
-}
-}
-}
-
-return false;
-}
-
-bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
-{
-RISCVCPU *cpu = RISCV_CPU(cs);
-CPURISCVState *env = &cpu->env;
-target_ulong ctrl;
-target_ulong addr;
-int trigger_type;
-int flags;
-int i;
-
-for (i = 0; i < RV_MAX_TRIGGERS; i++) {
-trigger_type = get_trigger_type(env, i);
-
-switch (trigger_type) {
-case TRIGGER_TYPE_AD_MATCH:
-/* type 2 trigger cannot be fired in VU/VS mode */
-if (env->virt_enabled) {
-return false;
-}
-
-ctrl = env->tdata1[i];
-addr = env->tdata2[i];
-flags = 0;
-
-if (ctrl & TYPE2_LOAD) {
-flags |= BP_MEM_READ;
-}
-if (ctrl & TYPE2_STORE) {
-flags |= BP_MEM_WRITE;
-}
-
-if ((wp->flags & flags) && (wp->vaddr == addr)) {
-/* check U/S/M bit against current privilege level */
-if ((ctrl >> 3) & BIT(env->priv)) {
-return true;
-}
-}
-break;
-case TRIGGER_TYPE_AD_MATCH6:
-ctrl = env->tdata1[i];
-addr = env->tda

[RFC PATCH v2 13/16] target/riscv: Move TCG/sysemu-specific code to tcg/sysemu/cpu_helper.c

2023-07-03 Thread Philippe Mathieu-Daudé

Move TCG/sysemu-specific code and restrict the corresponding
prototypes to TCG, adapting meson rules.

Signed-off-by: Philippe Mathieu-Daudé 
---
RFC due to riscv_cpu_get_phys_page_debug()
---
 target/riscv/cpu.h   |  15 +-
 target/riscv/cpu_helper.c| 745 --
 target/riscv/tcg/sysemu/cpu_helper.c | 765 +++
 target/riscv/tcg/tcg-stub.c  |   6 +
 target/riscv/tcg/sysemu/meson.build  |   1 +
 5 files changed, 781 insertions(+), 751 deletions(-)
 create mode 100644 target/riscv/tcg/sysemu/cpu_helper.c

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index f9754013a8..42bd7efe4c 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -429,12 +429,6 @@ void riscv_cpu_set_geilen(CPURISCVState *env, target_ulong 
geilen);
 bool riscv_cpu_vector_enabled(CPURISCVState *env);
 void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool enable);
 int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch);
-G_NORETURN void  riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
-   MMUAccessType access_type,
-   int mmu_idx, uintptr_t retaddr);
-bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-MMUAccessType access_type, int mmu_idx,
-bool probe, uintptr_t retaddr);
 char *riscv_isa_string(RISCVCPU *cpu);
 void riscv_cpu_list(void);
 void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp);
@@ -444,11 +438,20 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, 
Error **errp);
 
 #ifndef CONFIG_USER_ONLY
 void riscv_cpu_do_interrupt(CPUState *cpu);
+#ifdef CONFIG_TCG
+bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+MMUAccessType access_type, int mmu_idx,
+bool probe, uintptr_t retaddr);
 void riscv_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
  vaddr addr, unsigned size,
  MMUAccessType access_type,
  int mmu_idx, MemTxAttrs attrs,
  MemTxResult response, uintptr_t retaddr);
+G_NORETURN void  riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+   MMUAccessType access_type,
+   int mmu_idx, uintptr_t retaddr);
+#endif /* CONFIG_TCG */
+
 hwaddr riscv_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 bool riscv_cpu_exec_interrupt(CPUState *cs, int interrupt_request);
 void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env);
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index e73cf56e5c..f1d0cd1e64 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -331,69 +331,6 @@ int riscv_cpu_vsirq_pending(CPURISCVState *env)
 irqs >> 1, env->hviprio);
 }
 
-static int riscv_cpu_local_irq_pending(CPURISCVState *env)
-{
-int virq;
-uint64_t irqs, pending, mie, hsie, vsie;
-
-/* Determine interrupt enable state of all privilege modes */
-if (env->virt_enabled) {
-mie = 1;
-hsie = 1;
-vsie = (env->priv < PRV_S) ||
-   (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_SIE));
-} else {
-mie = (env->priv < PRV_M) ||
-  (env->priv == PRV_M && get_field(env->mstatus, MSTATUS_MIE));
-hsie = (env->priv < PRV_S) ||
-   (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_SIE));
-vsie = 0;
-}
-
-/* Determine all pending interrupts */
-pending = riscv_cpu_all_pending(env);
-
-/* Check M-mode interrupts */
-irqs = pending & ~env->mideleg & -mie;
-if (irqs) {
-return riscv_cpu_pending_to_irq(env, IRQ_M_EXT, IPRIO_DEFAULT_M,
-irqs, env->miprio);
-}
-
-/* Check HS-mode interrupts */
-irqs = pending & env->mideleg & ~env->hideleg & -hsie;
-if (irqs) {
-return riscv_cpu_pending_to_irq(env, IRQ_S_EXT, IPRIO_DEFAULT_S,
-irqs, env->siprio);
-}
-
-/* Check VS-mode interrupts */
-irqs = pending & env->mideleg & env->hideleg & -vsie;
-if (irqs) {
-virq = riscv_cpu_pending_to_irq(env, IRQ_S_EXT, IPRIO_DEFAULT_S,
-irqs >> 1, env->hviprio);
-return (virq <= 0) ? virq : virq + 1;
-}
-
-/* Indicate no pending interrupt */
-return RISCV_EXCP_NONE;
-}
-
-bool riscv_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
-{
-if (interrupt_request & CPU_INTERRUPT_HARD) {
-RISCVCPU *cpu = RISCV_CPU(cs);
-CPURISCVState *env = &cpu->env;
-int interruptno = riscv_cpu_local_irq_pending(env);
-if (interruptno >= 0) {
-cs->exception_index = RISCV_EXCP_INT_FLAG | inte

[PATCH v2 11/16] target/riscv: Move sysemu-specific debug files to target/riscv/sysemu/

2023-07-03 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/riscv/cpu.h| 2 +-
 target/riscv/{ => sysemu}/debug.h | 0
 target/riscv/cpu_helper.c | 2 +-
 target/riscv/{ => sysemu}/debug.c | 0
 target/riscv/meson.build  | 4 
 target/riscv/sysemu/meson.build   | 1 +
 6 files changed, 3 insertions(+), 6 deletions(-)
 rename target/riscv/{ => sysemu}/debug.h (100%)
 rename target/riscv/{ => sysemu}/debug.c (100%)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index e6a8087022..f9754013a8 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -89,7 +89,7 @@ typedef enum {
 
 #if !defined(CONFIG_USER_ONLY)
 #include "sysemu/pmp.h"
-#include "debug.h"
+#include "sysemu/debug.h"
 #endif
 
 #define RV_VLEN_MAX 1024
diff --git a/target/riscv/debug.h b/target/riscv/sysemu/debug.h
similarity index 100%
rename from target/riscv/debug.h
rename to target/riscv/sysemu/debug.h
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 6f8778c6d3..6c773000a5 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -32,7 +32,7 @@
 #include "sysemu/cpu-timers.h"
 #endif
 #include "cpu_bits.h"
-#include "debug.h"
+#include "sysemu/debug.h"
 #include "tcg/oversized-guest.h"
 
 int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
diff --git a/target/riscv/debug.c b/target/riscv/sysemu/debug.c
similarity index 100%
rename from target/riscv/debug.c
rename to target/riscv/sysemu/debug.c
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index 8ef47f43f9..49cdcde679 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -8,10 +8,6 @@ riscv_ss.add(files(
   'gdbstub.c',
 ))
 
-riscv_system_ss.add(files(
-  'debug.c',
-))
-
 subdir('tcg')
 subdir('sysemu')
 
diff --git a/target/riscv/sysemu/meson.build b/target/riscv/sysemu/meson.build
index 64de0256a5..e902ba2dad 100644
--- a/target/riscv/sysemu/meson.build
+++ b/target/riscv/sysemu/meson.build
@@ -1,5 +1,6 @@
 riscv_system_ss.add(files(
   'arch_dump.c',
+  'debug.c',
   'machine.c',
   'monitor.c',
   'pmp.c',
-- 
2.38.1

[PATCH v2 06/16] target/riscv: Restrict riscv_cpu_do_interrupt() to sysemu

2023-07-03 Thread Philippe Mathieu-Daudé

riscv_cpu_do_interrupt() is not reachable on user emulation.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/riscv/cpu.h| 5 +++--
 target/riscv/cpu_helper.c | 7 ++-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 00a4842d84..e6a8087022 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -411,7 +411,6 @@ extern const char * const riscv_int_regnamesh[];
 extern const char * const riscv_fpr_regnames[];
 
 const char *riscv_cpu_get_trap_name(target_ulong cause, bool async);
-void riscv_cpu_do_interrupt(CPUState *cpu);
 int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
int cpuid, DumpState *s);
 int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
@@ -444,6 +443,7 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error 
**errp);
 #define cpu_mmu_index riscv_cpu_mmu_index
 
 #ifndef CONFIG_USER_ONLY
+void riscv_cpu_do_interrupt(CPUState *cpu);
 void riscv_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
  vaddr addr, unsigned size,
  MMUAccessType access_type,
@@ -467,7 +467,8 @@ void riscv_cpu_set_aia_ireg_rmw_fn(CPURISCVState *env, 
uint32_t priv,
void *rmw_fn_arg);
 
 RISCVException smstateen_acc_ok(CPURISCVState *env, int index, uint64_t bit);
-#endif
+#endif /* !CONFIG_USER_ONLY */
+
 void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv);
 
 void riscv_translate_init(void);
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 0adde26321..597c47bc56 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -1579,7 +1579,6 @@ static target_ulong riscv_transformed_insn(CPURISCVState 
*env,
 
 return xinsn;
 }
-#endif /* !CONFIG_USER_ONLY */
 
 /*
  * Handle Traps
@@ -1589,8 +1588,6 @@ static target_ulong riscv_transformed_insn(CPURISCVState 
*env,
  */
 void riscv_cpu_do_interrupt(CPUState *cs)
 {
-#if !defined(CONFIG_USER_ONLY)
-
 RISCVCPU *cpu = RISCV_CPU(cs);
 CPURISCVState *env = &cpu->env;
 bool write_gva = false;
@@ -1783,6 +1780,6 @@ void riscv_cpu_do_interrupt(CPUState *cs)
 
 env->two_stage_lookup = false;
 env->two_stage_indirect_lookup = false;
-#endif
-cs->exception_index = RISCV_EXCP_NONE; /* mark handled to qemu */
 }
+
+#endif /* !CONFIG_USER_ONLY */
-- 
2.38.1

[PATCH v2 02/16] target/riscv: Remove unused 'instmap.h' header in translate.c

2023-07-03 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Daniel Henrique Barboza 
Acked-by: Alistair Francis 
---
 target/riscv/translate.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 621dd99241..e3a6697cd8 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -30,7 +30,6 @@
 #include "exec/log.h"
 #include "semihosting/semihost.h"
 
-#include "instmap.h"
 #include "internals.h"
 
 #define HELPER_H "helper.h"
-- 
2.38.1

[PATCH v2 00/16] target/riscv: Allow building without TCG (KVM-only so far)

2023-07-03 Thread Philippe Mathieu-Daudé

v2: Rebased on alistair23/riscv-to-apply.next

Patch #1-#5 reviewed.

Philippe Mathieu-Daudé (16):
  target/riscv: Remove unuseful KVM stubs
  target/riscv: Remove unused 'instmap.h' header in translate.c
  target/riscv: Restrict sysemu specific header to user emulation
  target/riscv: Restrict 'rv128' machine to TCG accelerator
  target/riscv: Move sysemu-specific files to target/riscv/sysemu/
  target/riscv: Restrict riscv_cpu_do_interrupt() to sysemu
  target/riscv: Move TCG-specific files to target/riscv/tcg/
  target/riscv: Move TCG-specific cpu_get_tb_cpu_state() to tcg/cpu.c
  target/riscv: Expose some 'trigger' prototypes from debug.c
  target/riscv: Extract TCG-specific code from debug.c
  target/riscv: Move sysemu-specific debug files to target/riscv/sysemu/
  target/riscv: Expose riscv_cpu_pending_to_irq() from cpu_helper.c
  target/riscv: Move TCG/sysemu-specific code to tcg/sysemu/cpu_helper.c
  target/riscv: Move sysemu-specific code to sysemu/cpu_helper.c
  target/riscv: Restrict TCG-specific prototype declarations
  gitlab-ci.d/crossbuilds: Add KVM riscv64 cross-build jobs

 target/riscv/cpu.h|   27 +-
 target/riscv/internals.h  |4 +
 target/riscv/{ => sysemu}/debug.h |6 +
 target/riscv/{ => sysemu}/instmap.h   |0
 target/riscv/{ => sysemu}/kvm_riscv.h |0
 target/riscv/{ => sysemu}/pmp.h   |0
 target/riscv/{ => sysemu}/pmu.h   |0
 target/riscv/{ => sysemu}/time_helper.h   |0
 target/riscv/{ => tcg}/XVentanaCondOps.decode |0
 target/riscv/{ => tcg}/insn16.decode  |0
 target/riscv/{ => tcg}/insn32.decode  |0
 target/riscv/{ => tcg}/xthead.decode  |0
 hw/riscv/virt.c   |2 +-
 target/riscv/cpu.c|   33 +-
 target/riscv/cpu_helper.c | 1692 +
 target/riscv/csr.c|6 +-
 target/riscv/{ => sysemu}/arch_dump.c |0
 target/riscv/sysemu/cpu_helper.c  |  863 +
 target/riscv/{ => sysemu}/debug.c |  153 +-
 target/riscv/{ => sysemu}/kvm-stub.c  |0
 target/riscv/{ => sysemu}/kvm.c   |4 +-
 target/riscv/{ => sysemu}/machine.c   |0
 target/riscv/{ => sysemu}/monitor.c   |0
 target/riscv/{ => sysemu}/pmp.c   |0
 target/riscv/{ => sysemu}/pmu.c   |0
 target/riscv/{ => sysemu}/riscv-qmp-cmds.c|0
 target/riscv/{ => sysemu}/time_helper.c   |0
 target/riscv/{ => tcg}/bitmanip_helper.c  |0
 target/riscv/tcg/cpu.c|   98 +
 target/riscv/{ => tcg}/crypto_helper.c|0
 target/riscv/{ => tcg}/fpu_helper.c   |0
 target/riscv/{ => tcg}/m128_helper.c  |0
 target/riscv/{ => tcg}/op_helper.c|0
 target/riscv/tcg/sysemu/cpu_helper.c  |  765 
 target/riscv/tcg/sysemu/debug.c   |  165 ++
 target/riscv/tcg/tcg-stub.c   |   31 +
 target/riscv/{ => tcg}/translate.c|1 -
 target/riscv/{ => tcg}/vector_helper.c|0
 target/riscv/{ => tcg}/zce_helper.c   |0
 .gitlab-ci.d/crossbuilds.yml  |8 +
 target/riscv/meson.build  |   33 +-
 target/riscv/sysemu/meson.build   |   13 +
 target/riscv/tcg/meson.build  |   22 +
 target/riscv/tcg/sysemu/meson.build   |4 +
 44 files changed, 2037 insertions(+), 1893 deletions(-)
 rename target/riscv/{ => sysemu}/debug.h (96%)
 rename target/riscv/{ => sysemu}/instmap.h (100%)
 rename target/riscv/{ => sysemu}/kvm_riscv.h (100%)
 rename target/riscv/{ => sysemu}/pmp.h (100%)
 rename target/riscv/{ => sysemu}/pmu.h (100%)
 rename target/riscv/{ => sysemu}/time_helper.h (100%)
 rename target/riscv/{ => tcg}/XVentanaCondOps.decode (100%)
 rename target/riscv/{ => tcg}/insn16.decode (100%)
 rename target/riscv/{ => tcg}/insn32.decode (100%)
 rename target/riscv/{ => tcg}/xthead.decode (100%)
 rename target/riscv/{ => sysemu}/arch_dump.c (100%)
 create mode 100644 target/riscv/sysemu/cpu_helper.c
 rename target/riscv/{ => sysemu}/debug.c (83%)
 rename target/riscv/{ => sysemu}/kvm-stub.c (100%)
 rename target/riscv/{ => sysemu}/kvm.c (99%)
 rename target/riscv/{ => sysemu}/machine.c (100%)
 rename target/riscv/{ => sysemu}/monitor.c (100%)
 rename target/riscv/{ => sysemu}/pmp.c (100%)
 rename target/riscv/{ => sysemu}/pmu.c (100%)
 rename target/riscv/{ => sysemu}/riscv-qmp-cmds.c (100%)
 rename target/riscv/{ => sysemu}/time_helper.c (100%)
 rename target/riscv/{ => tcg}/bitmanip_helper.c (100%)
 create mode 100644 target/riscv/tcg/cpu.c
 rename target/riscv/{ => tcg}/crypto_helper.c (100%)
 rename target/riscv/{ => tcg}/fpu_helper.c (100%)
 rename target/riscv/{ => tcg}/m128_helper.c (100%)
 rename target/riscv/{ => tcg}/op_helper.c (1

Re: [RFC v3 05/10] linux-user: Implement native-bypass option support

2023-07-03 Thread Alex Bennée



Yeqi Fu  writes:

> This commit implements the -native-bypass support in linux-user. The
> native_calls_enabled() function can be true only when the
> '-native-bypass' option is given.
>
> Signed-off-by: Yeqi Fu 
> ---
>  linux-user/main.c | 36 
>  1 file changed, 36 insertions(+)
>
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 5e6b2e1714..98e31c77d5 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -60,6 +60,13 @@
>  #include "semihosting/semihost.h"
>  #endif
>  
> +#if defined(CONFIG_NATIVE_CALL)
> +#include "native/native-defs.h"
> +
> +static const char *native_lib;
> +bool native_bypass_enabled;

This bool feels redundant if we can check for a non-null native-lib. You
could certainly expose a function though:

bool native_bypass_enabled() {
 return native_lib ? true : false;
}

?

> +#endif
> +
>  #ifndef AT_FLAGS_PRESERVE_ARGV0
>  #define AT_FLAGS_PRESERVE_ARGV0_BIT 0
>  #define AT_FLAGS_PRESERVE_ARGV0 (1 << AT_FLAGS_PRESERVE_ARGV0_BIT)
> @@ -125,6 +132,7 @@ static void usage(int exitcode);
>  static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
>  const char *qemu_uname_release;
>  
> +

rm whitespace

>  #if !defined(TARGET_DEFAULT_STACK_SIZE)
>  /* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so
> we allocate a bigger stack. Need a better solution, for example
> @@ -293,6 +301,18 @@ static void handle_arg_set_env(const char *arg)
>  free(r);
>  }
>  
> +#if defined(CONFIG_NATIVE_CALL)
> +static void handle_arg_native_bypass(const char *arg)
> +{
> +if (access(arg, F_OK) != 0) {
> +fprintf(stderr, "native library %s does not exist\n", arg);
> +exit(EXIT_FAILURE);
> +}
> +native_lib = arg;
> +native_bypass_enabled = true;
> +}
> +#endif
> +
>  static void handle_arg_unset_env(const char *arg)
>  {
>  char *r, *p, *token;
> @@ -522,6 +542,10 @@ static const struct qemu_argument arg_table[] = {
>   "",   "Generate a /tmp/perf-${pid}.map file for perf"},
>  {"jitdump","QEMU_JITDUMP", false, handle_arg_jitdump,
>   "",   "Generate a jit-${pid}.dump file for perf"},
> +#if defined(CONFIG_NATIVE_CALL)
> +{"native-bypass", "QEMU_NATIVE_BYPASS", true, handle_arg_native_bypass,
> + "",   "native bypass for library calls in user mode only."},
> +#endif
>  {NULL, NULL, false, NULL, NULL, NULL}
>  };
>  
> @@ -826,6 +850,18 @@ int main(int argc, char **argv, char **envp)
>  }
>  }
>  
> +#if defined(CONFIG_NATIVE_CALL)
> +/* Set the library for native bypass  */
> +if (native_bypass_enabled) {

Then this could be:

  if (native_lib && g_file_test(native_lib, G_FILE_TEST_EXITS)) {

Or maybe better:

 if (native_lib) {
if (g_file_test(native_lib, G_FILE_TEST_EXITS)) {
   .. setup ..
} else {
   fprintf(stderr, "can't open %s\n", native_lib);
   exit(EXIT_FAILURE);
}
}

> +GString *lib = g_string_new(native_lib);
> +lib = g_string_prepend(lib, "LD_PRELOAD=");
> +if (envlist_appendenv(envlist, g_string_free(lib, false), ":") != 0) 
> {
> +fprintf(stderr,
> +"failed to append the native library to environment.\n");
> +exit(EXIT_FAILURE);
> +}
> +}
> +#endif
>  target_environ = envlist_to_environ(envlist, NULL);
>  envlist_free(envlist);


-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: qemu-user self emulation broken with default CPU on x86/x64

2023-07-03 Thread Daniel P . Berrangé

On Mon, Jul 03, 2023 at 06:03:08PM +0200, Pierrick Bouvier wrote:
> Hi everyone,
> 
> Recently (in d135f781 [1], between v7.0.0 and v8.0.0), qemu-user default cpu
> was updated to "max" instead of qemu32/qemu64.
> 
> This change "broke" qemu self emulation if this new default cpu is used.
> 
> $ ./qemu-x86_64 ./qemu-x86_64 --version
> qemu-x86_64: ../util/cacheflush.c:212: init_cache_info: Assertion `(isize &
> (isize - 1)) == 0' failed.
> qemu: uncaught target signal 6 (Aborted) - core dumped
> Aborted
> 
> By setting cpu back to qemu64, it works again.
> $ ./qemu-x86_64 -cpu qemu64 ./qemu-x86_64  --version
> qemu-x86_64 version 8.0.50 (v8.0.0-2317-ge125b08ed6)
> Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers
> 
> Commenting assert does not work, as qemu aligned malloc fail shortly after.
> 
> I'm willing to fix it, but I'm not sure what is the issue with "max" cpu
> exactly. Is it missing CPU cache line, or something else?
> Any guidance would be welcome.

Yes, it appears that dcache info being reported by the 'max' CPU is
bogus. We can simply the test case with 'getconf'

With 'max' CPU model:

# getconf -a | grep DCACHE
LEVEL1_DCACHE_SIZE 7273809
LEVEL1_DCACHE_ASSOC1
LEVEL1_DCACHE_LINESIZE 2697

with 'qemu64' CPU model

# getconf -a | grep DCACHE
LEVEL1_DCACHE_SIZE 0
LEVEL1_DCACHE_ASSOC0
LEVEL1_DCACHE_LINESIZE 0

OR with Nehalem:

# getconf -a | grep DCACHE
LEVEL1_DCACHE_SIZE 32768
LEVEL1_DCACHE_ASSOC8
LEVEL1_DCACHE_LINESIZE 64

> 
> I know it's not the most important problem on earth, but it's still
> surprising to meet this when you try to use qemu to emulate itself.
> 
> Regards,
> Pierrick
> 
> [1] 
> https://gitlab.com/qemu-project/qemu/-/commit/d135f781405f7c78153aa65e0327b05a4aa72e50
> 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH] linux-user/syscall: Implement execve without execveat

2023-07-03 Thread Michael Tokarev


03.07.2023 18:48, Pierrick Bouvier пишет:

Support for execveat syscall was implemented in 55bbe4 and is available
since QEMU 8.0.0. It relies on host execveat, which is widely available
on most of Linux kernels today.

However, this change breaks qemu-user self emulation, if "host" qemu
version is less than 8.0.0. Indeed, it does not implement yet execveat.
This strange use case happens with most of distribution today having
binfmt support.

With a concrete failing example:
$ qemu-x86_64-7.2 qemu-x86_64-8.0 /bin/bash -c /bin/ls
/bin/bash: line 1: /bin/ls: Function not implemented
-> not implemented means execve returned ENOSYS

qemu-user-static 7.2 and 8.0 can be conveniently grabbed from debian
packages qemu-user-static* [1].

One usage of this is running wine-arm64 from linux-x64 (details [2]).
This is by updating qemu embedded in docker image that we ran into this
issue.

The solution to update host qemu is not always possible. Either it's
complicated or ask you to recompile it, or simply is not accessible
(GitLab CI, GitHub Actions). Thus, it could be worth to implement execve
without relying on execveat, which is the goal of this patch.

This patch was tested with example presented in this commit message.

[1] http://ftp.us.debian.org/debian/pool/main/q/qemu/
[1] https://www.linaro.org/blog/emulate-windows-on-arm/

Signed-off-by: Pierrick Bouvier 
---
  linux-user/syscall.c | 45 +---
  1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index f2cb101d83..b64ec3296a 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -659,6 +659,7 @@ safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, 
options, \
  #endif
  safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \
int, options, struct rusage *, rusage)
+safe_syscall3(int, execve, const char *, filename, char **, argv, char **, 
envp)
  safe_syscall5(int, execveat, int, dirfd, const char *, filename,
char **, argv, char **, envp, int, flags)
  #if defined(TARGET_NR_select) || defined(TARGET_NR__newselect) || \
@@ -8520,9 +8521,12 @@ static int do_openat(CPUArchState *cpu_env, int dirfd, 
const char *pathname, int
  return safe_openat(dirfd, path(pathname), flags, mode);
  }
  
-static int do_execveat(CPUArchState *cpu_env, int dirfd,

-   abi_long pathname, abi_long guest_argp,
-   abi_long guest_envp, int flags)
+#define IS_EXECVEAT 0
+#define IS_EXECVE 1
+
+static int do_execv(CPUArchState *cpu_env, int dirfd,
+abi_long pathname, abi_long guest_argp,
+abi_long guest_envp, int flags, bool is_execve)
  {
  int ret;
  char **argp, **envp;
@@ -8601,10 +8605,18 @@ static int do_execveat(CPUArchState *cpu_env, int dirfd,
  goto execve_efault;
  }
  
-if (is_proc_myself(p, "exe")) {

-ret = get_errno(safe_execveat(dirfd, exec_path, argp, envp, flags));
+if (is_execve == IS_EXECVE) {


is_execve is either bool or not. I'd use it as bool, and pass true/false.
Right now it is inconsistent.


+if (is_proc_myself(p, "exe")) {
+ret = get_errno(safe_execve(exec_path, argp, envp));
+} else {
+ret = get_errno(safe_execve(p, argp, envp));
+}
  } else {
-ret = get_errno(safe_execveat(dirfd, p, argp, envp, flags));
+if (is_proc_myself(p, "exe")) {
+ret = get_errno(safe_execveat(dirfd, exec_path, argp, envp, 
flags));
+} else {
+ret = get_errno(safe_execveat(dirfd, p, argp, envp, flags));
+}
  }


And this can be simplified quite a bit by using a condition on
is_proc_myself(p, "exe"):

  if (is_proc_myself(p, exe)) {
 p = exec_path;
  }
  ret = is_excveat ?
safe_execveat(dirfd, p, argp, envp, flags) :
safe_execve(p, argp, envp);
  ret = get_errno(ret);
  ...

I dunno which way Laurent might prefer, but to my taste this way it is
much more readable (give or take the proper coding style to use here, -
I don't remember how the arithmetic if should be styled).

  
  unlock_user(p, pathname, 0);

@@ -8633,6 +8645,25 @@ execve_end:
  return ret;
  }
  
+static int do_execveat(CPUArchState *cpu_env, int dirfd,

+   abi_long pathname, abi_long guest_argp,
+   abi_long guest_envp, int flags)
+{
+return do_execv(cpu_env, dirfd,
+pathname, guest_argp, guest_envp, flags,
+IS_EXECVEAT);
+}
+
+static int do_execve(CPUArchState *cpu_env,
+ abi_long pathname, abi_long guest_argp,
+ abi_long guest_envp)
+{
+return do_execv(cpu_env, AT_FDCWD,
+pathname, guest_argp, guest_envp, 0,
+IS_EXECVE);
+}
+
+
  #define TIMER_MAGIC 0x0caf
  #define TIMER_MAGIC_MASK 0x
  
@@ -9158,7 +9189,7 @@ static abi_long do

Re: [PATCH] vfio: Fix null pointer dereference bug in vfio_bars_finalize()

2023-07-03 Thread Philippe Mathieu-Daudé


On 3/7/23 18:39, Avihai Horon wrote:

vfio_realize() has the following flow:
1. vfio_bars_prepare() -- sets VFIOBAR->size.
2. msix_early_setup().
3. vfio_bars_register() -- allocates VFIOBAR->mr.

After vfio_bars_prepare() is called msix_early_setup() can fail. If it
does fail, vfio_bars_register() is never called and VFIOBAR->mr is not
allocated.

In this case, vfio_bars_finalize() is called as part of the error flow
to free the bars' resources. However, vfio_bars_finalize() calls
object_unparent() for VFIOBAR->mr unconditionally and thus we get a null
pointer dereference.

Fix it by checking VFIOBAR->mr in vfio_bars_finalize().

Fixes: 89d5202edc50 ("vfio/pci: Allow relocating MSI-X MMIO")
Signed-off-by: Avihai Horon 
---
  hw/vfio/pci.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index ab6645ba60..95e077082b 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1752,7 +1752,7 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev)
  
  vfio_bar_quirk_finalize(vdev, i);

  vfio_region_finalize(&bar->region);
-if (bar->size) {
+if (bar->size && bar->mr) {
  object_unparent(OBJECT(bar->mr));
  g_free(bar->mr);
  }


What about:

if (bar->mr) {
assert(bar->size);
object_unparent(OBJECT(bar->mr));
g_free(bar->mr);
bar->mr = NULL;
}

?

Re: [RFC v3 04/10] linux-user: Implement envlist_appendenv

2023-07-03 Thread Warner Losh

On Mon, Jul 3, 2023, 10:40 AM Alex Bennée  wrote:

>
> Yeqi Fu  writes:
>
> > Signed-off-by: Yeqi Fu 
> > ---
> >  include/qemu/envlist.h |  1 +
> >  util/envlist.c | 61 ++
> >  2 files changed, 62 insertions(+)
> >
> > diff --git a/include/qemu/envlist.h b/include/qemu/envlist.h
> > index 6006dfae44..865eb18e17 100644
> > --- a/include/qemu/envlist.h
> > +++ b/include/qemu/envlist.h
> > @@ -7,6 +7,7 @@ envlist_t *envlist_create(void);
> >  void envlist_free(envlist_t *);
> >  int envlist_setenv(envlist_t *, const char *);
> >  int envlist_unsetenv(envlist_t *, const char *);
> > +int envlist_appendenv(envlist_t *, const char *, const char *);
> >  int envlist_parse_set(envlist_t *, const char *);
> >  int envlist_parse_unset(envlist_t *, const char *);
> >  char **envlist_to_environ(const envlist_t *, size_t *);
> > diff --git a/util/envlist.c b/util/envlist.c
> > index db937c0427..635c9c4fab 100644
> > --- a/util/envlist.c
> > +++ b/util/envlist.c
> > @@ -201,6 +201,67 @@ envlist_unsetenv(envlist_t *envlist, const char
> *env)
> >  return (0);
>

No parens here

>  }
> >
> > +/*
> > + * Appends environment value to envlist. If the environment
> > + * variable already exists, the new value is appended to the
> > + * existing one.
> > + *
> > + * Returns 0 in success, errno otherwise.
> > + */
> > +int
> > +envlist_appendenv(envlist_t *envlist, const char *env, const char
> *separator)
> > +{
> > +struct envlist_entry *entry = NULL;
> > +const char *eq_sign;
> > +size_t envname_len;
> > +
> > +if ((envlist == NULL) || (env == NULL) || (separator == NULL)) {
> > +return (EINVAL);
>
> No () around the EINVAL needed here.
>

And elsewhere...

Warner

> +}
> > +
> > +/* find out first equals sign in given env */
> > +eq_sign = strchr(env, '=');
> > +if (eq_sign == NULL) {
> > +return (EINVAL);
> > +}
> > +
> > +if (strchr(eq_sign + 1, '=') != NULL) {
> > +return (EINVAL);
> > +}
> > +
> > +envname_len = eq_sign - env + 1;
> > +
> > +/*
> > + * If there already exists variable with given name,
> > + * we append the new value to the existing one.
> > + */
> > +for (entry = envlist->el_entries.lh_first; entry != NULL;
> > +entry = entry->ev_link.le_next) {
> > +if (strncmp(entry->ev_var, env, envname_len) == 0) {
> > +break;
> > +}
> > +}
> > +
> > +if (entry != NULL) {
> > +char *new_env_value = NULL;
> > +size_t new_env_len = strlen(entry->ev_var) + strlen(eq_sign)
> > ++ strlen(separator) + 1;
> > +new_env_value = g_malloc(new_env_len);
> > +strcpy(new_env_value, entry->ev_var);
> > +strcat(new_env_value, separator);
> > +strcat(new_env_value, eq_sign + 1);
> > +g_free((char *)entry->ev_var);
> > +entry->ev_var = new_env_value;
> > +} else {
> > +envlist->el_count++;
> > +entry = g_malloc(sizeof(*entry));
> > +entry->ev_var = g_strdup(env);
> > +QLIST_INSERT_HEAD(&envlist->el_entries, entry, ev_link);
> > +}
> > +
> > +return (0);
> > +}
> > +
>
> We really should add something to tests/unit/test-env to check the
> various operations work as expected.
>
>
> >  /*
> >   * Returns given envlist as array of strings (in same form that
> >   * global variable environ is).  Caller must free returned memory
>
>
> --
> Alex Bennée
> Virtualisation Tech Lead @ Linaro
>
>

Re: [PATCH RFC 1/1] vdpa: Return -EINVAL if device's ack is VIRTIO_NET_ERR

2023-07-03 Thread Michael S. Tsirkin

On Wed, Jun 14, 2023 at 09:01:47PM +0800, Hawkins Jiawei wrote:
> According to VirtIO standard, "The class, command and
> command-specific-data are set by the driver,
> and the device sets the ack byte.
> There is little it can do except issue a diagnostic
> if ack is not VIRTIO_NET_OK."
> 
> Therefore, QEMU should stop sending the queued SVQ commands and
> cancel the device startup if the device's ack is not VIRTIO_NET_OK.
> 
> Yet the problem is that, vhost_vdpa_net_load_x() returns 1 based on
> `*s->status != VIRTIO_NET_OK` when the device's ack is VIRTIO_NET_ERR.
> As a result, net->nc->info->load() also returns 1, this makes
> vhost_net_start_one() incorrectly assume the device state is
> successfully loaded by vhost_vdpa_net_load() and return 0, instead of
> goto `fail` label to cancel the device startup, as vhost_net_start_one()
> only cancels the device startup when net->nc->info->load() returns a
> negative value.
> 
> This patch fixes this problem by returning -EINVAL when the device's
> ack is not VIRTIO_NET_OK.
> 
> Fixes: f73c0c43ac ("vdpa: extract vhost_vdpa_net_load_mac from 
> vhost_vdpa_net_load")
> Fixes: f64c7cda69 ("vdpa: Add vhost_vdpa_net_load_mq")
> Signed-off-by: Hawkins Jiawei 
> ---
>  net/vhost-vdpa.c | 10 +++---
>  1 file changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 37cdc84562..630c9bf71e 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -651,8 +651,9 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, 
> const VirtIONet *n)
>  if (unlikely(dev_written < 0)) {
>  return dev_written;
>  }
> -
> -return *s->status != VIRTIO_NET_OK;
> +if (*s->status != VIRTIO_NET_OK) {
> +return -EINVAL;
> +}
>  }
>  
>  return 0;
> @@ -676,8 +677,11 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
>  if (unlikely(dev_written < 0)) {
>  return dev_written;
>  }
> +if (*s->status != VIRTIO_NET_OK) {
> +return -EINVAL;
> +}
>  
> -return *s->status != VIRTIO_NET_OK;
> +return 0;
>  }

I think EIO would be better, we have too many EINVAL cases,
making things hard to debug.


>  
>  static int vhost_vdpa_net_load(NetClientState *nc)
> -- 
> 2.25.1

[PATCH] vfio: Fix null pointer dereference bug in vfio_bars_finalize()

2023-07-03 Thread Avihai Horon

vfio_realize() has the following flow:
1. vfio_bars_prepare() -- sets VFIOBAR->size.
2. msix_early_setup().
3. vfio_bars_register() -- allocates VFIOBAR->mr.

After vfio_bars_prepare() is called msix_early_setup() can fail. If it
does fail, vfio_bars_register() is never called and VFIOBAR->mr is not
allocated.

In this case, vfio_bars_finalize() is called as part of the error flow
to free the bars' resources. However, vfio_bars_finalize() calls
object_unparent() for VFIOBAR->mr unconditionally and thus we get a null
pointer dereference.

Fix it by checking VFIOBAR->mr in vfio_bars_finalize().

Fixes: 89d5202edc50 ("vfio/pci: Allow relocating MSI-X MMIO")
Signed-off-by: Avihai Horon 
---
 hw/vfio/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index ab6645ba60..95e077082b 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1752,7 +1752,7 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev)
 
 vfio_bar_quirk_finalize(vdev, i);
 vfio_region_finalize(&bar->region);
-if (bar->size) {
+if (bar->size && bar->mr) {
 object_unparent(OBJECT(bar->mr));
 g_free(bar->mr);
 }
-- 
2.26.3

Re: [RFC v3 04/10] linux-user: Implement envlist_appendenv

2023-07-03 Thread Alex Bennée



Yeqi Fu  writes:

> Signed-off-by: Yeqi Fu 
> ---
>  include/qemu/envlist.h |  1 +
>  util/envlist.c | 61 ++
>  2 files changed, 62 insertions(+)
>
> diff --git a/include/qemu/envlist.h b/include/qemu/envlist.h
> index 6006dfae44..865eb18e17 100644
> --- a/include/qemu/envlist.h
> +++ b/include/qemu/envlist.h
> @@ -7,6 +7,7 @@ envlist_t *envlist_create(void);
>  void envlist_free(envlist_t *);
>  int envlist_setenv(envlist_t *, const char *);
>  int envlist_unsetenv(envlist_t *, const char *);
> +int envlist_appendenv(envlist_t *, const char *, const char *);
>  int envlist_parse_set(envlist_t *, const char *);
>  int envlist_parse_unset(envlist_t *, const char *);
>  char **envlist_to_environ(const envlist_t *, size_t *);
> diff --git a/util/envlist.c b/util/envlist.c
> index db937c0427..635c9c4fab 100644
> --- a/util/envlist.c
> +++ b/util/envlist.c
> @@ -201,6 +201,67 @@ envlist_unsetenv(envlist_t *envlist, const char *env)
>  return (0);
>  }
>  
> +/*
> + * Appends environment value to envlist. If the environment
> + * variable already exists, the new value is appended to the
> + * existing one.
> + *
> + * Returns 0 in success, errno otherwise.
> + */
> +int
> +envlist_appendenv(envlist_t *envlist, const char *env, const char *separator)
> +{
> +struct envlist_entry *entry = NULL;
> +const char *eq_sign;
> +size_t envname_len;
> +
> +if ((envlist == NULL) || (env == NULL) || (separator == NULL)) {
> +return (EINVAL);

No () around the EINVAL needed here.

> +}
> +
> +/* find out first equals sign in given env */
> +eq_sign = strchr(env, '=');
> +if (eq_sign == NULL) {
> +return (EINVAL);
> +}
> +
> +if (strchr(eq_sign + 1, '=') != NULL) {
> +return (EINVAL);
> +}
> +
> +envname_len = eq_sign - env + 1;
> +
> +/*
> + * If there already exists variable with given name,
> + * we append the new value to the existing one.
> + */
> +for (entry = envlist->el_entries.lh_first; entry != NULL;
> +entry = entry->ev_link.le_next) {
> +if (strncmp(entry->ev_var, env, envname_len) == 0) {
> +break;
> +}
> +}
> +
> +if (entry != NULL) {
> +char *new_env_value = NULL;
> +size_t new_env_len = strlen(entry->ev_var) + strlen(eq_sign)
> ++ strlen(separator) + 1;
> +new_env_value = g_malloc(new_env_len);
> +strcpy(new_env_value, entry->ev_var);
> +strcat(new_env_value, separator);
> +strcat(new_env_value, eq_sign + 1);
> +g_free((char *)entry->ev_var);
> +entry->ev_var = new_env_value;
> +} else {
> +envlist->el_count++;
> +entry = g_malloc(sizeof(*entry));
> +entry->ev_var = g_strdup(env);
> +QLIST_INSERT_HEAD(&envlist->el_entries, entry, ev_link);
> +}
> +
> +return (0);
> +}
> +

We really should add something to tests/unit/test-env to check the
various operations work as expected.


>  /*
>   * Returns given envlist as array of strings (in same form that
>   * global variable environ is).  Caller must free returned memory


-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH RFC 1/1] vdpa: Return -EINVAL if device's ack is VIRTIO_NET_ERR

2023-07-03 Thread Eugenio Perez Martin

On Wed, Jun 14, 2023 at 3:02 PM Hawkins Jiawei  wrote:
>
> According to VirtIO standard, "The class, command and
> command-specific-data are set by the driver,
> and the device sets the ack byte.
> There is little it can do except issue a diagnostic
> if ack is not VIRTIO_NET_OK."
>
> Therefore, QEMU should stop sending the queued SVQ commands and
> cancel the device startup if the device's ack is not VIRTIO_NET_OK.
>
> Yet the problem is that, vhost_vdpa_net_load_x() returns 1 based on
> `*s->status != VIRTIO_NET_OK` when the device's ack is VIRTIO_NET_ERR.
> As a result, net->nc->info->load() also returns 1, this makes
> vhost_net_start_one() incorrectly assume the device state is
> successfully loaded by vhost_vdpa_net_load() and return 0, instead of
> goto `fail` label to cancel the device startup, as vhost_net_start_one()
> only cancels the device startup when net->nc->info->load() returns a
> negative value.
>
> This patch fixes this problem by returning -EINVAL when the device's
> ack is not VIRTIO_NET_OK.
>
> Fixes: f73c0c43ac ("vdpa: extract vhost_vdpa_net_load_mac from 
> vhost_vdpa_net_load")
> Fixes: f64c7cda69 ("vdpa: Add vhost_vdpa_net_load_mq")
> Signed-off-by: Hawkins Jiawei 

Maybe we could split the fixes? Either way:

Acked-by: Eugenio Pérez 

Thanks!

> ---
>  net/vhost-vdpa.c | 10 +++---
>  1 file changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 37cdc84562..630c9bf71e 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -651,8 +651,9 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, 
> const VirtIONet *n)
>  if (unlikely(dev_written < 0)) {
>  return dev_written;
>  }
> -
> -return *s->status != VIRTIO_NET_OK;
> +if (*s->status != VIRTIO_NET_OK) {
> +return -EINVAL;
> +}
>  }
>
>  return 0;
> @@ -676,8 +677,11 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
>  if (unlikely(dev_written < 0)) {
>  return dev_written;
>  }
> +if (*s->status != VIRTIO_NET_OK) {
> +return -EINVAL;
> +}
>
> -return *s->status != VIRTIO_NET_OK;
> +return 0;
>  }
>
>  static int vhost_vdpa_net_load(NetClientState *nc)
> --
> 2.25.1
>

Re: [RFC v3 03/10] build: Implement libnative library and configure options

2023-07-03 Thread Alex Bennée



Yeqi Fu  writes:

> This commit implements a shared library, where native functions are
> rewritten as specialized instructions. At runtime, user programs load
> the shared library, and specialized instructions are executed when
> native functions are called.

This commit breaks the build:

  make -j9 all
  config-host.mak is out-of-date, running configure
  python determined to be '/usr/bin/python3'
  python version: Python 3.11.2
  mkvenv: Creating non-isolated virtual environment at 'pyvenv'
  mkvenv: checking for meson>=0.63.0
  The Meson build system
  Version: 1.0.1
  Source dir: /home/alex/lsrc/qemu.git
  Build dir: /home/alex/lsrc/qemu.git/builds/user
  Build type: native build
  Project name: qemu
  Project version: 8.0.50
  C compiler for the host machine: cc -m64 -mcx16 (gcc 12.2.0 "cc (Debian 
12.2.0-14) 12.2.0")
  C linker for the host machine: cc -m64 -mcx16 ld.bfd 2.40
  Host machine cpu family: x86_64
  Host machine cpu: x86_64
  Program scripts/symlink-install-tree.py found: YES 
(/home/alex/lsrc/qemu.git/builds/user/pyvenv/bin/python3 
/home/alex/lsrc/qemu.git/scripts/symlink-install-tree.py)
  Program sh found: YES (/usr/bin/sh)
  C++ compiler for the host machine: c++ -m64 -mcx16 (gcc 12.2.0 "c++ (Debian 
12.2.0-14) 12.2.0")
  C++ linker for the host machine: c++ -m64 -mcx16 ld.bfd 2.40

  ../../meson.build:68:0: ERROR: Key TARGET_DIRS is not in the dictionary.

  A full log can be found at 
/home/alex/lsrc/qemu.git/builds/user/meson-logs/meson-log.txt

  ERROR: meson setup failed

  make: *** [Makefile:83: config-host.mak] Error 1

  Compilation exited abnormally with code 2 at Mon Jul  3 16:29:43

I would split this into a number of separate commits.

  - configure logic to move around and share the config.mk
  - add the build machinery for libnative with a default WRAP_NATIVE
which asserts()
  - merge the per-arch macros with arch enabling patch

>
> Signed-off-by: Yeqi Fu 
> ---
>  Makefile|   2 +
>  common-user/native/Makefile.include |   9 +++
>  common-user/native/Makefile.target  |  26 +++
>  common-user/native/libnative.c  | 112 
>  configure   |  84 -
>  include/native/libnative.h  |  12 +++
>  include/native/native-defs.h|  65 
>  7 files changed, 293 insertions(+), 17 deletions(-)
>  create mode 100644 common-user/native/Makefile.include
>  create mode 100644 common-user/native/Makefile.target
>  create mode 100644 common-user/native/libnative.c
>  create mode 100644 include/native/libnative.h
>  create mode 100644 include/native/native-defs.h
>
> diff --git a/Makefile b/Makefile
> index 3c7d67142f..787b8954a6 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -185,6 +185,8 @@ SUBDIR_MAKEFLAGS=$(if $(V),,--no-print-directory --quiet)
>  
>  include $(SRC_PATH)/tests/Makefile.include
>  
> +include $(SRC_PATH)/common-user/native/Makefile.include
> +
>  all: recurse-all
>  
>  ROMS_RULES=$(foreach t, all clean distclean, $(addsuffix /$(t), $(ROMS)))
> diff --git a/common-user/native/Makefile.include 
> b/common-user/native/Makefile.include
> new file mode 100644
> index 00..40d20bcd4c
> --- /dev/null
> +++ b/common-user/native/Makefile.include
> @@ -0,0 +1,9 @@
> +.PHONY: build-native
> +build-native: $(NATIVE_TARGETS:%=build-native-library-%)
> +$(NATIVE_TARGETS:%=build-native-library-%): build-native-library-%:
> + $(call quiet-command, \
> + $(MAKE) -C common-user/native/$* $(SUBDIR_MAKEFLAGS), \
> + "BUILD","$* native library")
> +# endif
> +
> +all: build-native
> diff --git a/common-user/native/Makefile.target 
> b/common-user/native/Makefile.target
> new file mode 100644
> index 00..1bb468a2ec
> --- /dev/null
> +++ b/common-user/native/Makefile.target
> @@ -0,0 +1,26 @@
> +# -*- Mode: makefile -*-
> +#
> +# Library for native calls 
> +#
> +
> +all:
> +-include ../config-host.mak
> +-include config-target.mak
> +
> +CFLAGS+=-O1 -fPIC -shared -fno-stack-protector -I$(SRC_PATH)/include 
> -D$(TARGET_NAME) 
> +LDFLAGS+=
> +
> +ifeq ($(TARGET_NAME),arm)
> +EXTRA_CFLAGS+=-marm
> +endif

I think I mentioned before that this is something that the configure
script should add to config-target.mak.

> +
> +SRC = $(SRC_PATH)/common-user/native/libnative.c
> +LIBNATIVE = libnative.so
> +
> +all: $(LIBNATIVE)
> +
> +$(LIBNATIVE): $(SRC)
> + $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
> +
> +clean:
> + rm -f $(LIBNATIVE)
> diff --git a/common-user/native/libnative.c b/common-user/native/libnative.c
> new file mode 100644
> index 00..26a004e3b4
> --- /dev/null
> +++ b/common-user/native/libnative.c
> @@ -0,0 +1,112 @@
> +#include 
> +#include 
> +#include 
> +
> +#include "native/libnative.h"
> +#include "native/native-defs.h"
> +
> +#if defined(i386) || defined(x86_64)
> +/* unused opcode */
> +#define WRAP_NATIVE_CALL(func_id, abi_map)  \
> +do {

Re: [PATCH v4 00/10] tests/qtest: make migration-test massively faster

2023-07-03 Thread Thomas Huth


On 01/06/2023 18.13, Daniel P. Berrangé wrote:

This makes migration-test faster by observing that most of the pre-copy
tests don't need to be doing a live migration. They get sufficient code
coverage with the guest CPUs paused.

On my machine this cuts the overall execution time of migration-test
from 13 minutes, down to 8 minutes, without sacrificing any noticeable
code coverage.

Of the tests which do still run in live mode, some need to guarantee
a certain number of iterions. This is achieved by running the 1
iteration with an incredibly small bandwidth and max downtime to
prevent convergance, and watching query-migrate for the reported
iteration to increment. This guarantees that all the tests take at
least 30 seconds to run per iteration required.

Watching for the iteration counter to flip is inefficient and not
actually needed, except on the final iteration before starting
convergance. On this final iteration we merely need to prove that
some amount of already transferred data has been made dirty again.
This in turn will guarantee that a further iteration is required
beyond the current one. This proof is easy to achieve by monitoring
the values at two distinct addresses in guest RAM, and can cut the
30 second duration down to 1 second for one of the iterations.

After this this second optimization the runtime is reduced from
8 minutes, down to 1 minute 40 seconds, which is pretty decent given
the amount of coverage we're getting.


It's now ~1 week until the soft freeze, and the migration test still run for 
~8 minutes. This is still quite annoying. Could we please get one of the 
solutions merged before the soft freeze, either Daniel's or Peter's ?


 Thanks,
  Thomas

Re: [PATCH v2 1/2] linux-headers: Update with vfio_ap IRQ index mapping

2023-07-03 Thread Cédric Le Goater


On 6/2/23 16:11, Tony Krowiak wrote:

Note: This is a placeholder patch that includes unmerged uapi changes.

Signed-off-by: Tony Krowiak 
Link: 
https://lore.kernel.org/qemu-devel/20230530225544.280031-1-akrow...@linux.ibm.com/


I am preparing a vfio-next tree including these changes plus a linux-headers
update. I am just waiting for the 6.5-rc1 tag to be pushed.

Thanks,

C.


---
  include/standard-headers/linux/const.h|  2 +-
  include/standard-headers/linux/virtio_blk.h   | 18 +++
  .../standard-headers/linux/virtio_config.h|  6 +++
  include/standard-headers/linux/virtio_net.h   |  1 +
  linux-headers/asm-arm64/kvm.h | 33 
  linux-headers/asm-riscv/kvm.h | 53 ++-
  linux-headers/asm-riscv/unistd.h  |  9 
  linux-headers/asm-s390/unistd_32.h|  1 +
  linux-headers/asm-s390/unistd_64.h|  1 +
  linux-headers/asm-x86/kvm.h   |  3 ++
  linux-headers/linux/const.h   |  2 +-
  linux-headers/linux/kvm.h | 12 +++--
  linux-headers/linux/psp-sev.h |  7 +++
  linux-headers/linux/userfaultfd.h | 17 +-
  linux-headers/linux/vfio.h|  9 
  15 files changed, 158 insertions(+), 16 deletions(-)

diff --git a/include/standard-headers/linux/const.h 
b/include/standard-headers/linux/const.h
index 5e4898725168..1eb84b5087f8 100644
--- a/include/standard-headers/linux/const.h
+++ b/include/standard-headers/linux/const.h
@@ -28,7 +28,7 @@
  #define _BITUL(x) (_UL(1) << (x))
  #define _BITULL(x)(_ULL(1) << (x))
  
-#define __ALIGN_KERNEL(x, a)		__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)

+#define __ALIGN_KERNEL(x, a)   __ALIGN_KERNEL_MASK(x, 
(__typeof__(x))(a) - 1)
  #define __ALIGN_KERNEL_MASK(x, mask)  (((x) + (mask)) & ~(mask))
  
  #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))

diff --git a/include/standard-headers/linux/virtio_blk.h 
b/include/standard-headers/linux/virtio_blk.h
index 7155b1a4701b..d7be3cf5e42f 100644
--- a/include/standard-headers/linux/virtio_blk.h
+++ b/include/standard-headers/linux/virtio_blk.h
@@ -138,11 +138,11 @@ struct virtio_blk_config {
  
  	/* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */

struct virtio_blk_zoned_characteristics {
-   uint32_t zone_sectors;
-   uint32_t max_open_zones;
-   uint32_t max_active_zones;
-   uint32_t max_append_sectors;
-   uint32_t write_granularity;
+   __virtio32 zone_sectors;
+   __virtio32 max_open_zones;
+   __virtio32 max_active_zones;
+   __virtio32 max_append_sectors;
+   __virtio32 write_granularity;
uint8_t model;
uint8_t unused2[3];
} zoned;
@@ -239,11 +239,11 @@ struct virtio_blk_outhdr {
   */
  struct virtio_blk_zone_descriptor {
/* Zone capacity */
-   uint64_t z_cap;
+   __virtio64 z_cap;
/* The starting sector of the zone */
-   uint64_t z_start;
+   __virtio64 z_start;
/* Zone write pointer position in sectors */
-   uint64_t z_wp;
+   __virtio64 z_wp;
/* Zone type */
uint8_t z_type;
/* Zone state */
@@ -252,7 +252,7 @@ struct virtio_blk_zone_descriptor {
  };
  
  struct virtio_blk_zone_report {

-   uint64_t nr_zones;
+   __virtio64 nr_zones;
uint8_t reserved[56];
struct virtio_blk_zone_descriptor zones[];
  };
diff --git a/include/standard-headers/linux/virtio_config.h 
b/include/standard-headers/linux/virtio_config.h
index 965ee6ae237e..8a7d0dc8b007 100644
--- a/include/standard-headers/linux/virtio_config.h
+++ b/include/standard-headers/linux/virtio_config.h
@@ -97,6 +97,12 @@
   */
  #define VIRTIO_F_SR_IOV   37
  
+/*

+ * This feature indicates that the driver passes extra data (besides
+ * identifying the virtqueue) in its device notifications.
+ */
+#define VIRTIO_F_NOTIFICATION_DATA 38
+
  /*
   * This feature indicates that the driver can reset a queue individually.
   */
diff --git a/include/standard-headers/linux/virtio_net.h 
b/include/standard-headers/linux/virtio_net.h
index c0e797067aae..2325485f2ca8 100644
--- a/include/standard-headers/linux/virtio_net.h
+++ b/include/standard-headers/linux/virtio_net.h
@@ -61,6 +61,7 @@
  #define VIRTIO_NET_F_GUEST_USO6   55  /* Guest can handle USOv6 in. */
  #define VIRTIO_NET_F_HOST_USO 56  /* Host can handle USO in. */
  #define VIRTIO_NET_F_HASH_REPORT  57  /* Supports hash report */
+#define VIRTIO_NET_F_GUEST_HDRLEN  59  /* Guest provides the exact hdr_len 
value. */
  #define VIRTIO_NET_F_RSS60/* Supports RSS RX steering */
  #define VIRTIO_NET_F_RSC_EXT61/* extended coalescing info */
  #define VIRTIO_NET_F_STANDBY62/* Act as standby for another device
diff --git a/linux-headers/asm-arm64/kv

Re: [RFC PATCH 1/4] linux-headers: Update for vfio capability reporting AtomicOps

2023-07-03 Thread Cédric Le Goater


On 5/19/23 23:57, Alex Williamson wrote:

This is a partial linux-headers update for illustrative and testing
purposes only, NOT FOR COMMIT.

Signed-off-by: Alex Williamson 
---

I am preparing a vfio-next tree including these changes plus a linux-headers
update. I am just waiting for the 6.5-rc1 tag to be pushed.

Thanks,

C.


  linux-headers/linux/vfio.h | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 4a534edbdcba..443a8851e156 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -240,6 +240,20 @@ struct vfio_device_info {
  #define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL3
  #define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP4
  
+/*

+ * The following VFIO_DEVICE_INFO capability reports support for PCIe AtomicOp
+ * completion to the root bus with supported widths provided via flags.
+ */
+#define VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP   5
+struct vfio_device_info_cap_pci_atomic_comp {
+   struct vfio_info_cap_header header;
+   __u32 flags;
+#define VFIO_PCI_ATOMIC_COMP32 (1 << 0)
+#define VFIO_PCI_ATOMIC_COMP64 (1 << 1)
+#define VFIO_PCI_ATOMIC_COMP128(1 << 2)
+   __u32 reserved;
+};
+
  /**
   * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
   *   struct vfio_region_info)

[PATCH] Fix SEGFAULT on getting physical address of MMIO region.

2023-07-03 Thread Mikhail Tyutin

The fix is to clear TLB_INVALID_MASK bit in tlb_addr, as it happens in 
other places e.g. load_helper().


Signed-off-by: Dmitriy Solovev 
Signed-off-by: Mikhail Tyutin 
---
 accel/tcg/cputlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 5b51eff5a4..9045b6330a 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1708,7 +1708,7 @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, 
int mmu_idx,

 uintptr_t index = tlb_index(env, mmu_idx, addr);
 uint64_t tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
 -if (likely(tlb_hit(tlb_addr, addr))) {
+if (likely(tlb_hit(tlb_addr & ~TLB_INVALID_MASK, addr))) {
 /* We must have an iotlb entry for MMIO */
 if (tlb_addr & TLB_MMIO) {
 CPUTLBEntryFull *full;
--
2.34.1

qemu-user self emulation broken with default CPU on x86/x64

2023-07-03 Thread Pierrick Bouvier


Hi everyone,

Recently (in d135f781 [1], between v7.0.0 and v8.0.0), qemu-user default 
cpu was updated to "max" instead of qemu32/qemu64.


This change "broke" qemu self emulation if this new default cpu is used.

$ ./qemu-x86_64 ./qemu-x86_64 --version
qemu-x86_64: ../util/cacheflush.c:212: init_cache_info: Assertion 
`(isize & (isize - 1)) == 0' failed.

qemu: uncaught target signal 6 (Aborted) - core dumped
Aborted

By setting cpu back to qemu64, it works again.
$ ./qemu-x86_64 -cpu qemu64 ./qemu-x86_64  --version
qemu-x86_64 version 8.0.50 (v8.0.0-2317-ge125b08ed6)
Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers

Commenting assert does not work, as qemu aligned malloc fail shortly after.

I'm willing to fix it, but I'm not sure what is the issue with "max" cpu 
exactly. Is it missing CPU cache line, or something else?

Any guidance would be welcome.

I know it's not the most important problem on earth, but it's still 
surprising to meet this when you try to use qemu to emulate itself.


Regards,
Pierrick

[1] 
https://gitlab.com/qemu-project/qemu/-/commit/d135f781405f7c78153aa65e0327b05a4aa72e50

Re: [PATCH 05/24] linux-user: Split TARGET_PROT_* out of syscall_defs.h

2023-07-03 Thread Philippe Mathieu-Daudé


On 30/6/23 15:21, Richard Henderson wrote:

Move the values into the per-target target_mman.h headers

Signed-off-by: Richard Henderson 
---
  linux-user/aarch64/target_mman.h |  3 +++
  linux-user/generic/target_mman.h |  4 
  linux-user/mips/target_mman.h|  2 ++
  linux-user/syscall_defs.h| 11 ---
  linux-user/xtensa/target_mman.h  |  2 ++
  5 files changed, 11 insertions(+), 11 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

[PATCH 07/12] target/s390x: Fix relative long instructions with large offsets

2023-07-03 Thread Ilya Leoshkevich

The expression "imm * 2" in gen_ri2() can wrap around if imm is large
enough.

Fix by casting imm to int64_t, like it's done in disas_jdest().

Fixes: e8ecdfeb30f0 ("Fix EXECUTE of relative branches")
Signed-off-by: Ilya Leoshkevich 
---
 target/s390x/tcg/translate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index a6079ab7b4f..6661b27efa4 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -5794,7 +5794,7 @@ static TCGv gen_ri2(DisasContext *s)
 
 disas_jdest(s, i2, is_imm, imm, ri2);
 if (is_imm) {
-ri2 = tcg_constant_i64(s->base.pc_next + imm * 2);
+ri2 = tcg_constant_i64(s->base.pc_next + (int64_t)imm * 2);
 }
 
 return ri2;
-- 
2.41.0

[PATCH 03/12] target/s390x: Fix MDEB and MDEBR

2023-07-03 Thread Ilya Leoshkevich

These instructions multiply 32 bits by 32 bits, not 32 bits by 64 bits.

Fixes: 83b00736f3d8 ("target-s390: Convert FP MULTIPLY")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich 
---
 target/s390x/tcg/fpu_helper.c| 3 ++-
 target/s390x/tcg/insn-data.h.inc | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
index 57e58292833..4b7fa58af3e 100644
--- a/target/s390x/tcg/fpu_helper.c
+++ b/target/s390x/tcg/fpu_helper.c
@@ -306,8 +306,9 @@ uint64_t HELPER(mdb)(CPUS390XState *env, uint64_t f1, 
uint64_t f2)
 /* 64/32-bit FP multiplication */
 uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
+float64 f1_64 = float32_to_float64(f1, &env->fpu_status);
 float64 ret = float32_to_float64(f2, &env->fpu_status);
-ret = float64_mul(f1, ret, &env->fpu_status);
+ret = float64_mul(f1_64, ret, &env->fpu_status);
 handle_exceptions(env, false, GETPC());
 return ret;
 }
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index 0a45dbbcda8..457ed25d2fa 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -667,11 +667,11 @@
 F(0xb317, MEEBR,   RRE,   Z,   e1, e2, new, e1, meeb, 0, IF_BFP)
 F(0xb31c, MDBR,RRE,   Z,   f1, f2, new, f1, mdb, 0, IF_BFP)
 F(0xb34c, MXBR,RRE,   Z,   x1, x2, new_x, x1, mxb, 0, IF_BFP)
-F(0xb30c, MDEBR,   RRE,   Z,   f1, e2, new, f1, mdeb, 0, IF_BFP)
+F(0xb30c, MDEBR,   RRE,   Z,   e1, e2, new, f1, mdeb, 0, IF_BFP)
 F(0xb307, MXDBR,   RRE,   Z,   f1, f2, new_x, x1, mxdb, 0, IF_BFP)
 F(0xed17, MEEB,RXE,   Z,   e1, m2_32u, new, e1, meeb, 0, IF_BFP)
 F(0xed1c, MDB, RXE,   Z,   f1, m2_64, new, f1, mdb, 0, IF_BFP)
-F(0xed0c, MDEB,RXE,   Z,   f1, m2_32u, new, f1, mdeb, 0, IF_BFP)
+F(0xed0c, MDEB,RXE,   Z,   e1, m2_32u, new, f1, mdeb, 0, IF_BFP)
 F(0xed07, MXDB,RXE,   Z,   f1, m2_64, new_x, x1, mxdb, 0, IF_BFP)
 /* MULTIPLY HALFWORD */
 C(0x4c00, MH,  RX_a,  Z,   r1_o, m2_16s, new, r1_32, mul, 0)
-- 
2.41.0

[PATCH 08/12] tests/tcg/s390x: Test EPSW

2023-07-03 Thread Ilya Leoshkevich

Add a small test to prevent regressions.

Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/epsw.c  | 23 +++
 2 files changed, 24 insertions(+)
 create mode 100644 tests/tcg/s390x/epsw.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 85abfbb98c0..2ef22c88d95 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -36,6 +36,7 @@ TESTS+=rxsbg
 TESTS+=ex-relative-long
 TESTS+=ex-branch
 TESTS+=mxdb
+TESTS+=epsw
 
 cdsg: CFLAGS+=-pthread
 cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/epsw.c b/tests/tcg/s390x/epsw.c
new file mode 100644
index 000..affb1a5e3a1
--- /dev/null
+++ b/tests/tcg/s390x/epsw.c
@@ -0,0 +1,23 @@
+/*
+ * Test the EPSW instruction.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+#include 
+
+int main(void)
+{
+unsigned long r1 = 0x1234567887654321UL, r2 = 0x8765432112345678UL;
+
+asm("cr %[r1],%[r2]\n"  /* cc = 1 */
+"epsw %[r1],%[r2]"
+: [r1] "+r" (r1), [r2] "+r" (r2) : : "cc");
+
+/* Do not check the R and RI bits. */
+r1 &= ~0x4008UL;
+assert(r1 == 0x1234567807051001UL);
+assert(r2 == 0x876543218000UL);
+
+return EXIT_SUCCESS;
+}
-- 
2.41.0

[PATCH 04/12] target/s390x: Fix MVCRL with a large value in R0

2023-07-03 Thread Ilya Leoshkevich

Using a large R0 causes an assertion error:

qemu-s390x: target/s390x/tcg/mem_helper.c:183: access_prepare_nf: Assertion 
`size > 0 && size <= 4096' failed.

Even though PoP explicitly advises against using more than 8 bits for the
size, an emulator crash is never a good thing.

Fix by truncating the size to 8 bits.

Fixes: ea0a1053e276 ("s390x/tcg: Implement Miscellaneous-Instruction-Extensions 
Facility 3 for the s390x")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich 
---
 target/s390x/tcg/mem_helper.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index d02ec861d8b..84ad85212c9 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -514,6 +514,7 @@ void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t 
dest, uint64_t src)
 int32_t i;
 
 /* MVCRL always copies one more byte than specified - maximum is 256 */
+l &= 0xff;
 l++;
 
 access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
-- 
2.41.0

[PATCH 09/12] tests/tcg/s390x: Test LARL with a large offset

2023-07-03 Thread Ilya Leoshkevich

Add a small test to prevent regressions.

Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/larl.c  | 17 +
 2 files changed, 18 insertions(+)
 create mode 100644 tests/tcg/s390x/larl.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 2ef22c88d95..dbf64c991e9 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -37,6 +37,7 @@ TESTS+=ex-relative-long
 TESTS+=ex-branch
 TESTS+=mxdb
 TESTS+=epsw
+TESTS+=larl
 
 cdsg: CFLAGS+=-pthread
 cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/larl.c b/tests/tcg/s390x/larl.c
new file mode 100644
index 000..b9ced99a023
--- /dev/null
+++ b/tests/tcg/s390x/larl.c
@@ -0,0 +1,17 @@
+/*
+ * Test the LARL instruction.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+
+int main(void)
+{
+long algfi = (long)main;
+long larl;
+
+asm("algfi %[r],0xd000" : [r] "+r" (algfi) : : "cc");
+asm("larl %[r],main+0xd000" : [r] "=r" (larl));
+
+return algfi == larl ? EXIT_SUCCESS : EXIT_FAILURE;
+}
-- 
2.41.0

[PATCH 05/12] target/s390x: Fix LRA overwriting the top 32 bits on DAT error

2023-07-03 Thread Ilya Leoshkevich

When a DAT error occurs, LRA is supposed to write the error information
to the bottom 32 bits of R1, and leave the top 32 bits of R1 alone.

Fix by passing the original value of R1 into helper and copying the
top 32 bits to the return value.

Fixes: d8fe4a9c284f ("target-s390: Convert LRA")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich 
---
 target/s390x/helper.h | 2 +-
 target/s390x/tcg/mem_helper.c | 4 ++--
 target/s390x/tcg/translate.c  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 6bc01df73d7..05102578fc9 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -355,7 +355,7 @@ DEF_HELPER_FLAGS_4(idte, TCG_CALL_NO_RWG, void, env, i64, 
i64, i32)
 DEF_HELPER_FLAGS_4(ipte, TCG_CALL_NO_RWG, void, env, i64, i64, i32)
 DEF_HELPER_FLAGS_1(ptlb, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_1(purge, TCG_CALL_NO_RWG, void, env)
-DEF_HELPER_2(lra, i64, env, i64)
+DEF_HELPER_3(lra, i64, env, i64, i64)
 DEF_HELPER_1(per_check_exception, void, env)
 DEF_HELPER_FLAGS_3(per_branch, TCG_CALL_NO_RWG, void, env, i64, i64)
 DEF_HELPER_FLAGS_2(per_ifetch, TCG_CALL_NO_RWG, void, env, i64)
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index 84ad85212c9..94d93d7ea78 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -2356,7 +2356,7 @@ void HELPER(purge)(CPUS390XState *env)
 }
 
 /* load real address */
-uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
+uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
 {
 uint64_t asc = env->psw.mask & PSW_MASK_ASC;
 uint64_t ret, tec;
@@ -2370,7 +2370,7 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
 exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
 if (exc) {
 cc = 3;
-ret = exc | 0x8000;
+ret = (r1 & 0x) | exc | 0x8000;
 } else {
 cc = 0;
 ret |= addr & ~TARGET_PAGE_MASK;
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 0cef6efbef4..a6079ab7b4f 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -2932,7 +2932,7 @@ static DisasJumpType op_lctlg(DisasContext *s, DisasOps 
*o)
 
 static DisasJumpType op_lra(DisasContext *s, DisasOps *o)
 {
-gen_helper_lra(o->out, cpu_env, o->in2);
+gen_helper_lra(o->out, cpu_env, o->out, o->in2);
 set_cc_static(s);
 return DISAS_NEXT;
 }
-- 
2.41.0

[PATCH 12/12] tests/tcg/s390x: Test MVCRL with a large value in R0

2023-07-03 Thread Ilya Leoshkevich

Add a small test to prevent regressions.

Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/mie3-mvcrl.c | 46 
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
index 93c7b0a2903..ec78dd1d493 100644
--- a/tests/tcg/s390x/mie3-mvcrl.c
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -1,29 +1,55 @@
+#include 
 #include 
+#include 
 #include 
 
-
-static inline void mvcrl_8(const char *dst, const char *src)
+static void mvcrl(const char *dst, const char *src, size_t len)
 {
+register long r0 asm("r0") = len;
+
 asm volatile (
-"llill %%r0, 8\n"
 ".insn sse, 0xE50A, 0(%[dst]), 0(%[src])"
-: : [dst] "d" (dst), [src] "d" (src)
-: "r0", "memory");
+: : [dst] "d" (dst), [src] "d" (src), "r" (r0)
+: "memory");
 }
 
-
-int main(int argc, char *argv[])
+static bool test(void)
 {
 const char *alpha = "abcdefghijklmnop";
 
 /* array missing 'i' */
-char tstr[17] = "abcdefghjklmnop\0" ;
+char tstr[17] = "abcdefghjklmnop\0";
 
 /* mvcrl reference use: 'open a hole in an array' */
-mvcrl_8(tstr + 9, tstr + 8);
+mvcrl(tstr + 9, tstr + 8, 8);
 
 /* place missing 'i' */
 tstr[8] = 'i';
 
-return strncmp(alpha, tstr, 16ul);
+return strncmp(alpha, tstr, 16ul) == 0;
+}
+
+static bool test_bad_r0(void)
+{
+char src[256];
+
+/*
+ * PoP says: Bits 32-55 of general register 0 should contain zeros;
+ * otherwise, the program may not operate compatibly in the future.
+ *
+ * Try it anyway in order to check whether this would crash QEMU itself.
+ */
+mvcrl(src, src, (size_t)-1);
+
+return true;
+}
+
+int main(void)
+{
+bool ok = true;
+
+ok &= test();
+ok &= test_bad_r0();
+
+return ok ? EXIT_SUCCESS : EXIT_FAILURE;
 }
-- 
2.41.0

[PATCH 00/12] target/s390x: Miscellaneous TCG fixes

2023-07-03 Thread Ilya Leoshkevich

Hi,

Randomized testing found a number of issues in the s390x emulation.
This series fixes 6 of them (patches 2-7) and adds tests (patches
8-12); patch 1 is a cosmetic improvement needed for the EPSW test.

There are more issues, but I thought it would be better to send this
batch now.

Best regards,
Ilya

Ilya Leoshkevich (12):
  linux-user: elfload: Add more initial s390x PSW bits
  target/s390x: Fix EPSW CC reporting
  target/s390x: Fix MDEB and MDEBR
  target/s390x: Fix MVCRL with a large value in R0
  target/s390x: Fix LRA overwriting the top 32 bits on DAT error
  target/s390x: Fix LRA when DAT is off
  target/s390x: Fix relative long instructions with large offsets
  tests/tcg/s390x: Test EPSW
  tests/tcg/s390x: Test LARL with a large offset
  tests/tcg/s390x: Test LRA
  tests/tcg/s390x: Test MDEB and MDEBR
  tests/tcg/s390x: Test MVCRL with a large value in R0

 linux-user/elfload.c|  4 ++-
 target/s390x/helper.h   |  2 +-
 target/s390x/mmu_helper.c   |  2 +-
 target/s390x/tcg/fpu_helper.c   |  3 +-
 target/s390x/tcg/insn-data.h.inc|  4 +--
 target/s390x/tcg/mem_helper.c   |  5 +--
 target/s390x/tcg/translate.c|  8 +++--
 tests/tcg/s390x/Makefile.softmmu-target |  1 +
 tests/tcg/s390x/Makefile.target |  3 ++
 tests/tcg/s390x/epsw.c  | 23 +
 tests/tcg/s390x/larl.c  | 17 +
 tests/tcg/s390x/lra.S   | 19 ++
 tests/tcg/s390x/mdeb.c  | 30 
 tests/tcg/s390x/mie3-mvcrl.c| 46 +++--
 14 files changed, 147 insertions(+), 20 deletions(-)
 create mode 100644 tests/tcg/s390x/epsw.c
 create mode 100644 tests/tcg/s390x/larl.c
 create mode 100644 tests/tcg/s390x/lra.S
 create mode 100644 tests/tcg/s390x/mdeb.c

-- 
2.41.0

[PATCH 02/12] target/s390x: Fix EPSW CC reporting

2023-07-03 Thread Ilya Leoshkevich

EPSW should explicitly calculate and insert CC, like IPM does.

Fixes: e30a9d3fea58 ("target-s390: Implement EPSW")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich 
---
 target/s390x/tcg/translate.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index a6ee2d44234..0cef6efbef4 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -2383,10 +2383,14 @@ static DisasJumpType op_epsw(DisasContext *s, DisasOps 
*o)
 int r1 = get_field(s, r1);
 int r2 = get_field(s, r2);
 TCGv_i64 t = tcg_temp_new_i64();
+TCGv_i64 t_cc = tcg_temp_new_i64();
 
 /* Note the "subsequently" in the PoO, which implies a defined result
if r1 == r2.  Thus we cannot defer these writes to an output hook.  */
+gen_op_calc_cc(s);
+tcg_gen_extu_i32_i64(t_cc, cc_op);
 tcg_gen_shri_i64(t, psw_mask, 32);
+tcg_gen_deposit_i64(t, t, t_cc, 12, 2);
 store_reg32_i64(r1, t);
 if (r2 != 0) {
 store_reg32_i64(r2, psw_mask);
-- 
2.41.0

[PATCH 01/12] linux-user: elfload: Add more initial s390x PSW bits

2023-07-03 Thread Ilya Leoshkevich

Make the PSW look more similar to the real s390x userspace PSW.
Except for being there, the newly added bits should not affect the
userspace code execution.

Signed-off-by: Ilya Leoshkevich 
---
 linux-user/elfload.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 6900974c373..7935110bff4 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1635,7 +1635,9 @@ const char *elf_hwcap_str(uint32_t bit)
 static inline void init_thread(struct target_pt_regs *regs, struct image_info 
*infop)
 {
 regs->psw.addr = infop->entry;
-regs->psw.mask = PSW_MASK_64 | PSW_MASK_32;
+regs->psw.mask = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
+ PSW_MASK_MCHECK | PSW_MASK_PSTATE | PSW_MASK_64 | \
+ PSW_MASK_32;
 regs->gprs[15] = infop->start_stack;
 }
 
-- 
2.41.0

[PATCH 11/12] tests/tcg/s390x: Test MDEB and MDEBR

2023-07-03 Thread Ilya Leoshkevich

Add a small test to prevent regressions.

Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/mdeb.c  | 30 ++
 2 files changed, 31 insertions(+)
 create mode 100644 tests/tcg/s390x/mdeb.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index dbf64c991e9..19fbbc6e531 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -38,6 +38,7 @@ TESTS+=ex-branch
 TESTS+=mxdb
 TESTS+=epsw
 TESTS+=larl
+TESTS+=mdeb
 
 cdsg: CFLAGS+=-pthread
 cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/mdeb.c b/tests/tcg/s390x/mdeb.c
new file mode 100644
index 000..4897d28069f
--- /dev/null
+++ b/tests/tcg/s390x/mdeb.c
@@ -0,0 +1,30 @@
+/*
+ * Test the MDEB and MDEBR instructions.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+#include 
+
+int main(void)
+{
+union {
+float f[2];
+double d;
+} a;
+float b;
+
+a.f[0] = 1.2345;
+a.f[1] = 999;
+b = 6.789;
+asm("mdeb %[a],%[b]" : [a] "+f" (a.d) : [b] "R" (b));
+assert(a.d > 8.38 && a.d < 8.39);
+
+a.f[0] = 1.2345;
+a.f[1] = 999;
+b = 6.789;
+asm("mdebr %[a],%[b]" : [a] "+f" (a.d) : [b] "f" (b));
+assert(a.d > 8.38 && a.d < 8.39);
+
+return EXIT_SUCCESS;
+}
-- 
2.41.0

[PATCH 06/12] target/s390x: Fix LRA when DAT is off

2023-07-03 Thread Ilya Leoshkevich

LRA should perform DAT regardless of whether it's on or off.
Disable DAT check for MMU_S390_LRA.

Fixes: defb0e3157af ("s390x: Implement opcode helpers")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich 
---
 target/s390x/mmu_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/s390x/mmu_helper.c b/target/s390x/mmu_helper.c
index b04b57c2356..fbb2f1b4d48 100644
--- a/target/s390x/mmu_helper.c
+++ b/target/s390x/mmu_helper.c
@@ -417,7 +417,7 @@ int mmu_translate(CPUS390XState *env, target_ulong vaddr, 
int rw, uint64_t asc,
 
 vaddr &= TARGET_PAGE_MASK;
 
-if (!(env->psw.mask & PSW_MASK_DAT)) {
+if (rw != MMU_S390_LRA && !(env->psw.mask & PSW_MASK_DAT)) {
 *raddr = vaddr;
 goto nodat;
 }
-- 
2.41.0

[PATCH 10/12] tests/tcg/s390x: Test LRA

2023-07-03 Thread Ilya Leoshkevich

Add a small test to prevent regressions.

Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/Makefile.softmmu-target |  1 +
 tests/tcg/s390x/lra.S   | 19 +++
 2 files changed, 20 insertions(+)
 create mode 100644 tests/tcg/s390x/lra.S

diff --git a/tests/tcg/s390x/Makefile.softmmu-target 
b/tests/tcg/s390x/Makefile.softmmu-target
index 44dfd716291..242c7b0f83c 100644
--- a/tests/tcg/s390x/Makefile.softmmu-target
+++ b/tests/tcg/s390x/Makefile.softmmu-target
@@ -20,6 +20,7 @@ ASM_TESTS =   
 \
 sam
\
 lpsw   
\
 lpswe-early
\
+lra
\
 ssm-early  
\
 stosm-early
\
 unaligned-lowcore
diff --git a/tests/tcg/s390x/lra.S b/tests/tcg/s390x/lra.S
new file mode 100644
index 000..79ab86f36bb
--- /dev/null
+++ b/tests/tcg/s390x/lra.S
@@ -0,0 +1,19 @@
+.org 0x200 /* lowcore padding */
+.globl _start
+_start:
+lgrl %r1,initial_r1
+lra %r1,0(%r1)
+cgrl %r1,expected_r1
+jne 1f
+lpswe success_psw
+1:
+lpswe failure_psw
+.align 8
+initial_r1:
+.quad 0x8765432112345678
+expected_r1:
+.quad 0x876543218038   /* ASCE type exception */
+success_psw:
+.quad 0x2,0xfff/* see is_special_wait_psw() */
+failure_psw:
+.quad 0x2,0/* disabled wait */
-- 
2.41.0

Re: [PATCH 01/24] linux-user: Use assert in mmap_fork_start

2023-07-03 Thread Philippe Mathieu-Daudé


On 30/6/23 15:21, Richard Henderson wrote:

Assert is preferred over if+abort for the error message.

Signed-off-by: Richard Henderson 
---
  linux-user/mmap.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH 02/24] linux-user: Fix formatting of mmap.c

2023-07-03 Thread Philippe Mathieu-Daudé


On 30/6/23 15:21, Richard Henderson wrote:

Fix all checkpatch.pl errors within mmap.c.

Signed-off-by: Richard Henderson 
---
  linux-user/mmap.c | 199 --
  1 file changed, 122 insertions(+), 77 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v2] target/riscv: Optimize ambiguous local variable in pmp_hart_has_privs

2023-07-03 Thread Philippe Mathieu-Daudé


On 3/7/23 16:03, Ruibo Lu wrote:

These two values represents whether start/end address is in pmp_range.
However, the type and name of them is ambiguous. This commit change the
name and type of them to improve code readability and accuracy.

Signed-off-by: Ruibo Lu 
---
  target/riscv/pmp.c | 20 ++--
  1 file changed, 10 insertions(+), 10 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH] MAINTAINERS: raise status of 9p to 'Maintained'

2023-07-03 Thread Philippe Mathieu-Daudé


On 3/7/23 16:34, Christian Schoenebeck wrote:

Change status of 9p from 'Odd Fixes' to 'Maintained', as this better
reflects current situation. I already take care of 9p patches for a
while, which included new features as well.

Based-on: 
Signed-off-by: Christian Schoenebeck 
---
  MAINTAINERS | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Philippe Mathieu-Daudé

1 2 3 4 >

1 - 100 of 334 matches

Mail list logo