[PATCH v4 0/2] x86/sev: Measured Linux SEV guest with kernel/initrd/cmdline

2021-09-29 Thread Dov Murik
Currently booting with -kernel/-initrd/-append is not supported in SEV
confidential guests, because the content of these blobs is not measured
and therefore not trusted by the SEV guest.

However, in some cases the kernel, initrd, and cmdline are not secret
but should not be modified by the host.  In such a case, we want to
verify inside the trusted VM that the kernel, initrd, and cmdline are
indeed the ones expected by the Guest Owner, and only if that is the
case go on and boot them up (removing the need for grub inside OVMF in
that mode).

To support that, OVMF adds a special area for hashes of
kernel/initrd/cmdline; that area is expected to be filled by QEMU and
encrypted as part of the initial SEV guest launch.  This in turn makes
the hashes part of the AMD PSP measured content, and OVMF can trust
these inputs if they match the hashes.

This series adds an SEV function to generate the table of hashes for
OVMF and encrypt it (patch 1/2), and calls this function if SEV is
enabled when the kernel/initrd/cmdline are prepared (patch 2/2).

Corresponding OVMF support [1] is already available in edk2 (patch series
"Measured SEV boot with kernel/initrd/cmdline").

[1] https://edk2.groups.io/g/devel/message/78250

---

v4 changes:
 - struct and variable renames (KernelLoaderContext -> SevKernelLoaderContext,
   kernel_loader_context -> sev_load_ctx).

v3 resend: 
https://lore.kernel.org/qemu-devel/20210825073538.959525-1-dovmu...@linux.ibm.com/
v3: 
https://lore.kernel.org/qemu-devel/20210624102040.2015280-1-dovmu...@linux.ibm.com/
v3 changes:
 - initrd hash is now mandatory; if no -initrd is passed, calculate the
   hash of the empty buffer.  This is now aligned with the OVMF
   behaviour which verifies the empty initrd (correctly).
 - make SevHashTable entries fixed: 3 entries for cmdline, initrd, and kernel.
 - in sev_add_kernel_loader_hashes: first calculate all the hashes, only then
   fill-in the hashes table in the guest's memory.
 - Use g_assert_not_reached in sev-stub.c.
 - Use QEMU_PACKED attribute for structs.
 - Use QemuUUID type for guids.
 - in sev_add_kernel_loader_hashes: use ARRAY_SIZE(iov) instead of literal 2.

v2: 
https://lore.kernel.org/qemu-devel/20210621190553.1763020-1-dovmu...@linux.ibm.com/
v2 changes:
 - Extract main functionality to sev.c (with empty stub in sev-stub.c)
 - Use sev_enabled() instead of machine->cgs->ready to detect SEV guest
 - Coding style changes

v1: 
https://lore.kernel.org/qemu-devel/20210525065931.1628554-1-dovmu...@linux.ibm.com/


Dov Murik (2):
  sev/i386: Introduce sev_add_kernel_loader_hashes for measured linux
boot
  x86/sev: generate SEV kernel loader hashes in x86_load_linux

 target/i386/sev_i386.h |  12 
 hw/i386/x86.c  |  25 +++-
 target/i386/sev-stub.c |   5 ++
 target/i386/sev.c  | 137 +
 4 files changed, 178 insertions(+), 1 deletion(-)

-- 
2.25.1




[PATCH v4 2/2] x86/sev: generate SEV kernel loader hashes in x86_load_linux

2021-09-29 Thread Dov Murik
If SEV is enabled and a kernel is passed via -kernel, pass the hashes of
kernel/initrd/cmdline in an encrypted guest page to OVMF for SEV
measured boot.

Co-developed-by: James Bottomley 
Signed-off-by: James Bottomley 
Signed-off-by: Dov Murik 
Reviewed-by: Daniel P. Berrangé 
---
 hw/i386/x86.c | 25 -
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 00448ed55a..3f37d17b5c 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -45,6 +45,7 @@
 #include "hw/i386/fw_cfg.h"
 #include "hw/intc/i8259.h"
 #include "hw/rtc/mc146818rtc.h"
+#include "target/i386/sev_i386.h"
 
 #include "hw/acpi/cpu_hotplug.h"
 #include "hw/irq.h"
@@ -778,6 +779,7 @@ void x86_load_linux(X86MachineState *x86ms,
 const char *initrd_filename = machine->initrd_filename;
 const char *dtb_filename = machine->dtb;
 const char *kernel_cmdline = machine->kernel_cmdline;
+SevKernelLoaderContext sev_load_ctx = {};
 
 /* Align to 16 bytes as a paranoia measure */
 cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
@@ -924,6 +926,8 @@ void x86_load_linux(X86MachineState *x86ms,
 fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
 fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1);
 fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
+sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
+sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1;
 
 if (protocol >= 0x202) {
 stl_p(header + 0x228, cmdline_addr);
@@ -1005,6 +1009,8 @@ void x86_load_linux(X86MachineState *x86ms,
 fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
 fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
 fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size);
+sev_load_ctx.initrd_data = initrd_data;
+sev_load_ctx.initrd_size = initrd_size;
 
 stl_p(header + 0x218, initrd_addr);
 stl_p(header + 0x21c, initrd_size);
@@ -1063,15 +1069,32 @@ void x86_load_linux(X86MachineState *x86ms,
 load_image_size(dtb_filename, setup_data->data, dtb_size);
 }
 
-memcpy(setup, header, MIN(sizeof(header), setup_size));
+/*
+ * If we're starting an encrypted VM, it will be OVMF based, which uses the
+ * efi stub for booting and doesn't require any values to be placed in the
+ * kernel header.  We therefore don't update the header so the hash of the
+ * kernel on the other side of the fw_cfg interface matches the hash of the
+ * file the user passed in.
+ */
+if (!sev_enabled()) {
+memcpy(setup, header, MIN(sizeof(header), setup_size));
+}
 
 fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
 fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
 fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
+sev_load_ctx.kernel_data = (char *)kernel;
+sev_load_ctx.kernel_size = kernel_size;
 
 fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
 fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
 fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
+sev_load_ctx.setup_data = (char *)setup;
+sev_load_ctx.setup_size = setup_size;
+
+if (sev_enabled()) {
+sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal);
+}
 
 option_rom[nb_option_roms].bootindex = 0;
 option_rom[nb_option_roms].name = "linuxboot.bin";
-- 
2.25.1




[PATCH v4 1/2] sev/i386: Introduce sev_add_kernel_loader_hashes for measured linux boot

2021-09-29 Thread Dov Murik
Add the sev_add_kernel_loader_hashes function to calculate the hashes of
the kernel/initrd/cmdline and fill a designated OVMF encrypted hash
table area.  For this to work, OVMF must support an encrypted area to
place the data which is advertised via a special GUID in the OVMF reset
table.

The hashes of each of the files is calculated (or the string in the case
of the cmdline with trailing '\0' included).  Each entry in the hashes
table is GUID identified and since they're passed through the
sev_encrypt_flash interface, the hashes will be accumulated by the AMD
PSP measurement (SEV_LAUNCH_MEASURE).

Co-developed-by: James Bottomley 
Signed-off-by: James Bottomley 
Signed-off-by: Dov Murik 
---
 target/i386/sev_i386.h |  12 
 target/i386/sev-stub.c |   5 ++
 target/i386/sev.c  | 137 +
 3 files changed, 154 insertions(+)

diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h
index ae6d840478..2afe108069 100644
--- a/target/i386/sev_i386.h
+++ b/target/i386/sev_i386.h
@@ -28,6 +28,17 @@
 #define SEV_POLICY_DOMAIN   0x10
 #define SEV_POLICY_SEV  0x20
 
+typedef struct SevKernelLoaderContext {
+char *setup_data;
+size_t setup_size;
+char *kernel_data;
+size_t kernel_size;
+char *initrd_data;
+size_t initrd_size;
+char *cmdline_data;
+size_t cmdline_size;
+} SevKernelLoaderContext;
+
 extern bool sev_es_enabled(void);
 extern uint64_t sev_get_me_mask(void);
 extern SevInfo *sev_get_info(void);
@@ -37,5 +48,6 @@ extern char *sev_get_launch_measurement(void);
 extern SevCapability *sev_get_capabilities(Error **errp);
 extern SevAttestationReport *
 sev_get_attestation_report(const char *mnonce, Error **errp);
+extern bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error 
**errp);
 
 #endif
diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c
index 0227cb5177..d8e6583171 100644
--- a/target/i386/sev-stub.c
+++ b/target/i386/sev-stub.c
@@ -81,3 +81,8 @@ sev_get_attestation_report(const char *mnonce, Error **errp)
 error_setg(errp, "SEV is not available in this QEMU");
 return NULL;
 }
+
+bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
+{
+g_assert_not_reached();
+}
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 0b2c8f594a..8b98e184c2 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -23,6 +23,7 @@
 #include "qemu/base64.h"
 #include "qemu/module.h"
 #include "qemu/uuid.h"
+#include "crypto/hash.h"
 #include "sysemu/kvm.h"
 #include "sev_i386.h"
 #include "sysemu/sysemu.h"
@@ -83,6 +84,32 @@ typedef struct __attribute__((__packed__)) SevInfoBlock {
 uint32_t reset_addr;
 } SevInfoBlock;
 
+#define SEV_HASH_TABLE_RV_GUID  "7255371f-3a3b-4b04-927b-1da6efa8d454"
+typedef struct QEMU_PACKED SevHashTableDescriptor {
+/* SEV hash table area guest address */
+uint32_t base;
+/* SEV hash table area size (in bytes) */
+uint32_t size;
+} SevHashTableDescriptor;
+
+/* hard code sha256 digest size */
+#define HASH_SIZE 32
+
+typedef struct QEMU_PACKED SevHashTableEntry {
+QemuUUID guid;
+uint16_t len;
+uint8_t hash[HASH_SIZE];
+} SevHashTableEntry;
+
+typedef struct QEMU_PACKED SevHashTable {
+QemuUUID guid;
+uint16_t len;
+SevHashTableEntry cmdline;
+SevHashTableEntry initrd;
+SevHashTableEntry kernel;
+uint8_t padding[];
+} SevHashTable;
+
 static SevGuestState *sev_guest;
 static Error *sev_mig_blocker;
 
@@ -1071,6 +1098,116 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t 
flash_size)
 return 0;
 }
 
+static const QemuUUID sev_hash_table_header_guid = {
+.data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93,
+0xd4, 0x11, 0xfd, 0x21)
+};
+
+static const QemuUUID sev_kernel_entry_guid = {
+.data = UUID_LE(0x4de79437, 0xabd2, 0x427f, 0xb8, 0x35, 0xd5, 0xb1,
+0x72, 0xd2, 0x04, 0x5b)
+};
+static const QemuUUID sev_initrd_entry_guid = {
+.data = UUID_LE(0x44baf731, 0x3a2f, 0x4bd7, 0x9a, 0xf1, 0x41, 0xe2,
+0x91, 0x69, 0x78, 0x1d)
+};
+static const QemuUUID sev_cmdline_entry_guid = {
+.data = UUID_LE(0x97d02dd8, 0xbd20, 0x4c94, 0xaa, 0x78, 0xe7, 0x71,
+0x4d, 0x36, 0xab, 0x2a)
+};
+
+/*
+ * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page
+ * which is included in SEV's initial memory measurement.
+ */
+bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
+{
+uint8_t *data;
+SevHashTableDescriptor *area;
+SevHashTable *ht;
+uint8_t cmdline_hash[HASH_SIZE];
+uint8_t initrd_hash[HASH_SIZE];
+uint8_t kernel_hash[HASH_SIZE];
+uint8_t *hashp;
+size_t hash_len = HASH_SIZE;
+int aligned_len;
+
+if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) {
+error_setg(errp, "SEV: kernel specified but OVMF has no hash table 
guid");
+return false;
+}
+area = (SevHashT

[Bug 1945540] Re: Java crashes on s390x VM with SIGILL/ILL_PRVOPC at '__kernel_getcpu+0x8'

2021-09-29 Thread Thomas Huth
There were some fixes in QEMU v6.1. Please try that one to see whether
it solves your problem, too.

** Project changed: qemu => qemu (Ubuntu)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1945540

Title:
  Java crashes on s390x VM with SIGILL/ILL_PRVOPC at
  '__kernel_getcpu+0x8'

Status in qemu package in Ubuntu:
  New

Bug description:
  Host environment

  - Operating system: Ubuntu 20.04.3 LTS Desktop
  - OS/kernel version: Linux tower 5.11.0-37-generic #41~20.04.2-Ubuntu
  SMP Fri Sep 24 09:06:38 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
  - Architecture: amd64
  - QEMU flavor: qemu-system-s390x
  - QEMU version: QEMU emulator version 4.2.1 (Debian 1:4.2-3ubuntu6.17)
  - QEMU command line: See attached file 'command-line.txt'

  Emulated/Virtualized environment

  - Operating system: Ubuntu 20.04.3 LTS Server
  - OS/kernel version: Linux s390x-focal 5.4.0-88-generic #99-Ubuntu
  SMP Thu Sep 23 17:27:44 UTC 2021 s390x s390x s390x GNU/Linux
  - Architecture: s390x

  Description of problem

  Java crashes as shown below:

  $ java --version
  #
  # A fatal error has been detected by the Java Runtime Environment:
  #
  #  SIGILL (0x4) at pc=0x03ff9f5fe6f4, pid=6789, tid=6818
  #
  # JRE version:  (17.0+35) (build )
  # Java VM: OpenJDK 64-Bit Server VM (17+35-snap, mixed mode, sharing,
  # tiered, compressed oops, compressed class ptrs, g1 gc, linux-s390x)
  # Problematic frame:
  # C  [linux-vdso64.so.1+0x6f8]  __kernel_getcpu+0x8
  #
  # Core dump will be written. Default location: core.6789 (may not
  # exist)
  #
  # An error report file with more information is saved as:
  # /home/ubuntu/src/hs_err_pid6789.log
  #
  #
  Aborted (core dumped)

  Steps to reproduce

  Run any Java program to reproduce the problem.

  Because the 'openjdk' packages in Ubuntu run the 'java' command during
  installation, they hit the same error and fail to install. As an
  alternative, you can install the OpenJDK Snap package for the 's390x'
  architecture as follows:

$ sudo snap install openjdk

  The OpenJDK Snap package has been tested to work on a real IBM/S390
  8561 system, namely the IBM LinuxONE III LT1 at Marist College:

Marist College Installs World’s First IBM LinuxONE III™
https://www.marist.edu/-/marist-first-linuxone-iii

  Additional information

  See the following attached files:

  command-line.txt - the command-line used to start the virtual machine
  hs_err_pid6789.log - the log file resulting from 'java --version'

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1945540/+subscriptions




[PULL 43/44] MAINTAINERS: Add information for OpenPIC

2021-09-29 Thread David Gibson
The OpenPIC interrupt controller was once the de facto standard on ppc
machines.  In qemu it's now only used on some Macintosh and the
Freescale e500 machine.  It has no listed maintainer, and as far as I
know, no-one who's really familiar with it any more.

Since I'm moving away from the area, I no longer have capacity to do even
minimal maintenance of it under the auspices of the ppc targets in general.

Therefore, this patch lists the main part of openpic, and marks it as
"Odd Fixes" to be looked after by Mark Cave-Ayland who handles the
Macintosh targets.  The openpic_kvm variant is only used on e500, so
add it to the files for that machine type (itself already Orphaned).

Signed-off-by: David Gibson 
Reviewed-by: Cédric Le Goater 
Reviewed-by: Greg Kurz 
Reviewed-by: Mark Cave-Ayland 
---
 MAINTAINERS | 8 
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index ed9691d65c..688233b44a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1256,6 +1256,8 @@ F: hw/pci-host/ppce500.c
 F: include/hw/ppc/ppc_e500.h
 F: include/hw/pci-host/ppce500.h
 F: pc-bios/u-boot.e500
+F: hw/intc/openpic_kvm.h
+F: include/hw/ppc/openpic_kvm.h
 
 mpc8544ds
 L: qemu-...@nongnu.org
@@ -2258,6 +2260,12 @@ F: net/can/*
 F: hw/net/can/*
 F: include/net/can_*.h
 
+OpenPIC interrupt controller
+M: Mark Cave-Ayland 
+S: Odd Fixes
+F: hw/intc/openpic.c
+F: include/hw/ppc/openpic.h
+
 Subsystems
 --
 Overall Audio backends
-- 
2.31.1




Re: [RFC PATCH 1/1] hw: aspeed_adc: Add initial Aspeed ADC support

2021-09-29 Thread Cédric Le Goater

Hello Peter,

If you run ./scripts/get_maintainer.pl on the patch, it will build
the list of persons and mailing list to send to.

On 9/30/21 02:42, p...@fb.com wrote:

From: Peter Delevoryas 

This change sets up Aspeed SoC ADC emulation, so that most ADC drivers
will pass the initialization sequence and load successfully. In the
future, we can extend this to emulate more features.

The initialization sequence is:

 1. Set `ADC00` to `0xF`.
 2. Wait for bit 8 of `ADC00` to be set.

I also added the sequence for enabling "Auto compensating sensing mode":

 1. Set `ADC00` to `0x2F` (set bit 5).
 2. Wait for bit 5 of `ADC00` to be reset (to zero).
 3. ...
 4. ...

Fuji (AST2600):
   Before:
 [   56.185778] aspeed_adc: probe of 1e6e9000.adc failed with error -110
 [   56.687936] aspeed_adc: probe of 1e6e9100.adc failed with error -110

   After:
 aspeed_adc_read 0x0c read 0x
 aspeed_adc_read 0x0c read 0x
 aspeed_adc_write 0x00 write 0x000f
 aspeed_adc_read 0x00 read 0x010f
 aspeed_adc_read 0x00 read 0x010f
 [   55.885164] aspeed_adc 1e6e9000.adc: trim 8
 aspeed_adc_read 0xc4 read 0x
 aspeed_adc_write 0xc4 write 0x0008
 aspeed_adc_write 0x00 write 0x011f
 aspeed_adc_write 0x00 write 0x1011f
 aspeed_adc_read 0x10 read 0x
 aspeed_adc_write 0x00 write 0x010f
 [   55.886509] aspeed_adc 1e6e9000.adc: cv 512
 aspeed_adc_write 0x00 write 0x010f
 aspeed_adc_read 0x0c read 0x
 aspeed_adc_read 0x0c read 0x
 aspeed_adc_write 0x00 write 0x000f
 aspeed_adc_read 0x00 read 0x010f
 aspeed_adc_read 0x00 read 0x010f
 [   55.890609] aspeed_adc 1e6e9100.adc: trim 8
 aspeed_adc_read 0xc4 read 0x
 aspeed_adc_write 0xc4 write 0x0008
 aspeed_adc_write 0x00 write 0x011f
 aspeed_adc_write 0x00 write 0x1011f
 aspeed_adc_read 0x10 read 0x
 aspeed_adc_write 0x00 write 0x010f
 [   55.891863] aspeed_adc 1e6e9100.adc: cv 512
 aspeed_adc_write 0x00 write 0x010f

YosemiteV2 (AST2500):
   Before:
 [   20.561588] ast_adc ast_adc.0: ast_adc_probe
 [   20.563741] hwmon hwmon0: write offset: c4, val: 8
 [   20.563925] hwmon hwmon0: write offset: c, val: 40
 [   20.564099] hwmon hwmon0: write offset: 0, val: f
 [   21.066110] ast_adc: driver init failed (ret=-110)!
 [   21.066635] ast_adc: probe of ast_adc.0 failed with error -110

   After:
 aspeed_adc_write 0xc4 write 0x0008
 aspeed_adc_write 0x0c write 0x0040
 aspeed_adc_write 0x00 write 0x000f
 aspeed_adc_read 0x00 read 0x010f
 aspeed_adc_write 0x00 write 0x002f
 aspeed_adc_read 0x00 read 0x000f
 aspeed_adc_read 0xc4 read 0x0008
 [   19.602033] ast_adc: driver successfully loaded.



FYI, these series was sent by Andrew in 2017 and I have been keeping
it alive since in the aspeed-x.y branches :

* memory: Support unaligned accesses on aligned-only models
  
https://github.com/legoater/qemu/commit/1960ba6bde27b91edb5336985a9210260a4c8938

  That was requested by Phil I think.

* hw/adc: Add basic Aspeed ADC model
  
https://github.com/legoater/qemu/commit/1eff7b1cf10d1777635f7d2cef8ecb441cc607c4

  This is the initial patch. I added multi-engine support recently
  for the fuji.

* hw/arm: Integrate ADC model into Aspeed SoC
  
https://github.com/legoater/qemu/commit/3052f9d8ccdaf78b753e53574b7e8cc2ee01429f

  That one is trivial.


Overall comments :

I prefer the 'regs' array approach of your proposal.

I think the AspeedADCEngine should appear as a QOM object. Check
the patches above.

To move on, maybe, you could rework the initial series and take
ownership ?


Some more below,



Signed-off-by: Peter Delevoryas 
---
  hw/adc/aspeed_adc.c | 205 
  hw/adc/meson.build  |   1 +
  hw/adc/trace-events |   4 +
  hw/arm/aspeed_ast2600.c |  18 
  hw/arm/aspeed_soc.c |  17 +++
  include/hw/adc/aspeed_adc.h |  48 +
  include/hw/arm/aspeed_soc.h |   5 +
  7 files changed, 298 insertions(+)
  create mode 100644 hw/adc/aspeed_adc.c
  create mode 100644 include/hw/adc/aspeed_adc.h

diff --git a/hw/adc/aspeed_adc.c b/hw/adc/aspeed_adc.c
new file mode 100644
index 00..590936148b
--- /dev/null
+++ b/hw/adc/aspeed_adc.c
@@ -0,0 +1,205 @@
+/*
+ * Aspeed ADC Controller
+ *
+ * Copyright 2021 Facebook, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/adc/aspeed_adc.h"
+#include "hw/

[PULL 35/44] hw/intc: openpic: Drop Raven related codes

2021-09-29 Thread David Gibson
From: Bin Meng 

There is no machine that uses Motorola MCP750 (aka Raven) model.
Drop the related codes.

While we are here, drop the mentioning of Intel GW80314 I/O
companion chip in the comments as it has been obsolete for years,
and correct a typo too.

Signed-off-by: Bin Meng 
Message-Id: <20210918032653.646370-2-bin.m...@windriver.com>
Signed-off-by: David Gibson 
---
 hw/intc/openpic.c| 28 +---
 include/hw/ppc/openpic.h | 16 
 2 files changed, 1 insertion(+), 43 deletions(-)

diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 2790c6710a..23eafb32bd 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -25,12 +25,8 @@
 /*
  *
  * Based on OpenPic implementations:
- * - Intel GW80314 I/O companion chip developer's manual
  * - Motorola MPC8245 & MPC8540 user manuals.
- * - Motorola MCP750 (aka Raven) programmer manual.
- * - Motorola Harrier programmer manuel
- *
- * Serial interrupts, as implemented in Raven chipset are not supported yet.
+ * - Motorola Harrier programmer manual
  *
  */
 
@@ -1564,28 +1560,6 @@ static void openpic_realize(DeviceState *dev, Error 
**errp)
 
 break;
 
-case OPENPIC_MODEL_RAVEN:
-opp->nb_irqs = RAVEN_MAX_EXT;
-opp->vid = VID_REVISION_1_3;
-opp->vir = VIR_GENERIC;
-opp->vector_mask = 0xFF;
-opp->tfrr_reset = 416;
-opp->ivpr_reset = IVPR_MASK_MASK | IVPR_MODE_MASK;
-opp->idr_reset = 0;
-opp->max_irq = RAVEN_MAX_IRQ;
-opp->irq_ipi0 = RAVEN_IPI_IRQ;
-opp->irq_tim0 = RAVEN_TMR_IRQ;
-opp->brr1 = -1;
-opp->mpic_mode_mask = GCR_MODE_MIXED;
-
-if (opp->nb_cpus != 1) {
-error_setg(errp, "Only UP supported today");
-return;
-}
-
-map_list(opp, list_le, &list_count);
-break;
-
 case OPENPIC_MODEL_KEYLARGO:
 opp->nb_irqs = KEYLARGO_MAX_EXT;
 opp->vid = VID_REVISION_1_2;
diff --git a/include/hw/ppc/openpic.h b/include/hw/ppc/openpic.h
index 74ff44bff0..f89802a15c 100644
--- a/include/hw/ppc/openpic.h
+++ b/include/hw/ppc/openpic.h
@@ -21,7 +21,6 @@ enum {
 
 typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines;
 
-#define OPENPIC_MODEL_RAVEN   0
 #define OPENPIC_MODEL_FSL_MPIC_20 1
 #define OPENPIC_MODEL_FSL_MPIC_42 2
 #define OPENPIC_MODEL_KEYLARGO3
@@ -32,13 +31,6 @@ typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } 
IrqLines;
 #define OPENPIC_MAX_IRQ (OPENPIC_MAX_SRC + OPENPIC_MAX_IPI + \
  OPENPIC_MAX_TMR)
 
-/* Raven */
-#define RAVEN_MAX_CPU  2
-#define RAVEN_MAX_EXT 48
-#define RAVEN_MAX_IRQ 64
-#define RAVEN_MAX_TMR  OPENPIC_MAX_TMR
-#define RAVEN_MAX_IPI  OPENPIC_MAX_IPI
-
 /* KeyLargo */
 #define KEYLARGO_MAX_CPU  4
 #define KEYLARGO_MAX_EXT  64
@@ -49,14 +41,6 @@ typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } 
IrqLines;
 /* Timers don't exist but this makes the code happy... */
 #define KEYLARGO_TMR_IRQ  (KEYLARGO_IPI_IRQ + KEYLARGO_MAX_IPI)
 
-/* Interrupt definitions */
-#define RAVEN_FE_IRQ (RAVEN_MAX_EXT) /* Internal functional IRQ */
-#define RAVEN_ERR_IRQ(RAVEN_MAX_EXT + 1) /* Error IRQ */
-#define RAVEN_TMR_IRQ(RAVEN_MAX_EXT + 2) /* First timer IRQ */
-#define RAVEN_IPI_IRQ(RAVEN_TMR_IRQ + RAVEN_MAX_TMR) /* First IPI IRQ */
-/* First doorbell IRQ */
-#define RAVEN_DBL_IRQ(RAVEN_IPI_IRQ + (RAVEN_MAX_CPU * RAVEN_MAX_IPI))
-
 typedef struct FslMpicInfo {
 int max_ext;
 } FslMpicInfo;
-- 
2.31.1




[PULL 38/44] spapr/xive: Fix kvm_xive_source_reset trace event

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

The trace event was placed in the wrong routine. Move it under
kvmppc_xive_source_reset_one().

Fixes: 4e960974d4ee ("xive: Add trace events")
Signed-off-by: Cédric Le Goater 
Message-Id: <20210922070205.1235943-1-...@kaod.org>
Reviewed-by: Greg Kurz 
Signed-off-by: David Gibson 
---
 hw/intc/spapr_xive_kvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 3e534b9685..6d4909d0a8 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -236,6 +236,8 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int 
srcno, Error **errp)
 SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
 uint64_t state = 0;
 
+trace_kvm_xive_source_reset(srcno);
+
 assert(xive->fd != -1);
 
 if (xive_source_irq_is_lsi(xsrc, srcno)) {
@@ -311,8 +313,6 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, 
uint32_t offset,
 return xive_esb_rw(xsrc, srcno, offset, data, 1);
 }
 
-trace_kvm_xive_source_reset(srcno);
-
 /*
  * Special Load EOI handling for LSI sources. Q bit is never set
  * and the interrupt should be re-triggered if the level is still
-- 
2.31.1




Re: [PATCH v3 0/3] monitor: Rate-limit MEMORY_DEVICE_SIZE_CHANGE qapi events per device

2021-09-29 Thread Markus Armbruster
David Hildenbrand  writes:

> Some fixes for virtio-mem-pci, to properly handle MEMORY_DEVICE_SIZE_CHANGE
> events, especially not dropping events of some devices when rate-limiting.

Series
Reviewed-by: Markus Armbruster 




[PULL 34/44] hw/intc: openpic: Correct the reset value of IPIDR for FSL chipset

2021-09-29 Thread David Gibson
From: Bin Meng 

The reset value of IPIDR should be zero for Freescale chipset, per
the following 2 manuals I checked:

- P2020RM (https://www.nxp.com/webapp/Download?colCode=P2020RM)
- P4080RM (https://www.nxp.com/webapp/Download?colCode=P4080RM)

Currently it is set to 1, which leaves the IPI enabled on core 0
after power-on reset. Such may cause unexpected interrupt to be
delivered to core 0 if the IPI is triggered from core 0 to other
cores later.

Fixes: ffd5e9fe0276 ("openpic: Reset IRQ source private members")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/584
Signed-off-by: Bin Meng 
Message-Id: <20210918032653.646370-1-bin.m...@windriver.com>
Signed-off-by: David Gibson 
---
 hw/intc/openpic.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 9b4c17854d..2790c6710a 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -1276,6 +1276,15 @@ static void openpic_reset(DeviceState *d)
 break;
 }
 
+/* Mask all IPI interrupts for Freescale OpenPIC */
+if ((opp->model == OPENPIC_MODEL_FSL_MPIC_20) ||
+(opp->model == OPENPIC_MODEL_FSL_MPIC_42)) {
+if (i >= opp->irq_ipi0 && i < opp->irq_tim0) {
+write_IRQreg_idr(opp, i, 0);
+continue;
+}
+}
+
 write_IRQreg_idr(opp, i, opp->idr_reset);
 }
 /* Initialise IRQ destinations */
-- 
2.31.1




[PULL 44/44] MAINTAINERS: Demote sPAPR from "Supported" to "Maintained"

2021-09-29 Thread David Gibson
qemu/KVM on Power is no longer my primary job responsibility, nor Greg
Kurz'.  I still have some time for upstream maintenance, but it's no longer
accurate to say that I'm paid to do so.  Therefore, reduce sPAPR (the
"pseries" machine type) from Supported to Maintained.

Signed-off-by: David Gibson 
Reviewed-by: Greg Kurz 
Reviewed-by: Cédric Le Goater 
Reviewed-by: Philippe Mathieu-Daudé 
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 688233b44a..50435b8d2f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1317,7 +1317,7 @@ sPAPR
 M: David Gibson 
 M: Greg Kurz 
 L: qemu-...@nongnu.org
-S: Supported
+S: Maintained
 F: hw/*/spapr*
 F: include/hw/*/spapr*
 F: hw/*/xics*
-- 
2.31.1




[PULL 33/44] target/ppc: Fix 64-bit decrementer

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

The current way the mask is built can overflow with a 64-bit decrementer.
Use sextract64() to extract the signed values and remove the logic to
handle negative values which has become useless.

Cc: Luis Fernando Fujita Pires 
Fixes: a8dafa525181 ("target/ppc: Implement large decrementer support for TCG")
Signed-off-by: Cédric Le Goater 
Message-Id: <20210920061203.989563-5-...@kaod.org>
Reviewed-by: Luis Pires 
Signed-off-by: David Gibson 
---
 hw/ppc/ppc.c | 20 +---
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index b813ef732e..f5d012f860 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -821,14 +821,12 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, 
uint64_t *nextp,
 CPUPPCState *env = &cpu->env;
 ppc_tb_t *tb_env = env->tb_env;
 uint64_t now, next;
-bool negative;
+int64_t signed_value;
+int64_t signed_decr;
 
 /* Truncate value to decr_width and sign extend for simplicity */
-value &= ((1ULL << nr_bits) - 1);
-negative = !!(value & (1ULL << (nr_bits - 1)));
-if (negative) {
-value |= (0xULL << nr_bits);
-}
+signed_value = sextract64(value, 0, nr_bits);
+signed_decr = sextract64(decr, 0, nr_bits);
 
 trace_ppc_decr_store(nr_bits, decr, value);
 
@@ -850,16 +848,16 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, 
uint64_t *nextp,
  * On MSB edge based DEC implementations the MSB going from 0 -> 1 triggers
  * an edge interrupt, so raise it here too.
  */
-if ((value < 3) ||
-((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && negative) ||
-((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && negative
-  && !(decr & (1ULL << (nr_bits - 1) {
+if ((signed_value < 3) ||
+((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && signed_value < 0) ||
+((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && signed_value < 0
+  && signed_decr >= 0)) {
 (*raise_excp)(cpu);
 return;
 }
 
 /* On MSB level based systems a 0 for the MSB stops interrupt delivery */
-if (!negative && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
+if (signed_value >= 0 && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
 (*lower_excp)(cpu);
 }
 
-- 
2.31.1




[PULL 28/44] spapr_numa.c: rename numa_assoc_array to FORM1_assoc_array

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

Introducing a new NUMA affinity, FORM2, requires a new mechanism to
switch between affinity modes after CAS. Also, we want FORM2 data
structures and functions to be completely separated from the existing
FORM1 code, allowing us to avoid adding new code that inherits the
existing complexity of FORM1.

The idea of switching values used by the write_dt() functions in
spapr_numa.c was already introduced in the previous patch, and
the same approach will be used when dealing with the FORM1 and FORM2
arrays.

We can accomplish that by that by renaming the existing numa_assoc_array
to FORM1_assoc_array, which now is used exclusively to handle FORM1 affinity
data. A new helper get_associativity() is then introduced to be used by the
write_dt() functions to retrieve the current ibm,associativity array of
a given node, after considering affinity selection that might have been
done during CAS. All code that was using numa_assoc_array now needs to
retrieve the array by calling this function.

This will allow for an easier plug of FORM2 data later on.

Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210920174947.556324-5-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_hcall.c   |  1 +
 hw/ppc/spapr_numa.c| 38 +-
 include/hw/ppc/spapr.h |  2 +-
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 0e9a5b2e40..9056644890 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -17,6 +17,7 @@
 #include "kvm_ppc.h"
 #include "hw/ppc/fdt.h"
 #include "hw/ppc/spapr_ovec.h"
+#include "hw/ppc/spapr_numa.h"
 #include "mmu-book3s-v3.h"
 #include "hw/mem/memory-device.h"
 
diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 08e2d6aed8..dce9ce987a 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -46,6 +46,15 @@ static int get_vcpu_assoc_size(SpaprMachineState *spapr)
 return get_numa_assoc_size(spapr) + 1;
 }
 
+/*
+ * Retrieves the ibm,associativity array of NUMA node 'node_id'
+ * for the current NUMA affinity.
+ */
+static const uint32_t *get_associativity(SpaprMachineState *spapr, int node_id)
+{
+return spapr->FORM1_assoc_array[node_id];
+}
+
 static bool spapr_numa_is_symmetrical(MachineState *ms)
 {
 int src, dst;
@@ -124,7 +133,7 @@ static void 
spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
  */
 for (i = 1; i < nb_numa_nodes; i++) {
 for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
-spapr->numa_assoc_array[i][j] = cpu_to_be32(i);
+spapr->FORM1_assoc_array[i][j] = cpu_to_be32(i);
 }
 }
 
@@ -176,8 +185,8 @@ static void 
spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
  * and going up to 0x1.
  */
 for (i = n_level; i > 0; i--) {
-assoc_src = spapr->numa_assoc_array[src][i];
-spapr->numa_assoc_array[dst][i] = assoc_src;
+assoc_src = spapr->FORM1_assoc_array[src][i];
+spapr->FORM1_assoc_array[dst][i] = assoc_src;
 }
 }
 }
@@ -204,8 +213,8 @@ static void 
spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
  * 'i' will be a valid node_id set by the user.
  */
 for (i = 0; i < nb_numa_nodes; i++) {
-spapr->numa_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
-spapr->numa_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
+spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
+spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
 }
 
 /*
@@ -219,15 +228,15 @@ static void 
spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
 max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;
 
 for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
-spapr->numa_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
+spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
 
 for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
 uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
  SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
-spapr->numa_assoc_array[i][j] = gpu_assoc;
+spapr->FORM1_assoc_array[i][j] = gpu_assoc;
 }
 
-spapr->numa_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
+spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
 }
 
 /*
@@ -259,14 +268,17 @@ void spapr_numa_associativity_init(SpaprMachineState 
*spapr,
 void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
int offset, int nodeid)
 {
+const uint32_t *associativity = get_associativity(spapr, nodeid);
+
 _FDT((fdt_setprop(fdt, offset, "ibm,associativity",
-  spapr->numa_assoc_array[nodeid],
+  associativity,

[PULL 41/44] MAINTAINERS: Orphan obscure ppc platforms

2021-09-29 Thread David Gibson
There are a nunber of old embedded ppc machine types which have been little
changed and in "Odd Fixes" state for a long time.  With both myself and
Greg Kurz moving toward other areas, we no longer have the capacity to
keep reviewing and maintaining even the rare patches that come in for those
platforms.

Therefore, remove our names as reviewers and mark these platforms as
orphaned.

Signed-off-by: David Gibson 
Reviewed-by: Greg Kurz 
Reviewed-by: Cédric Le Goater 
---
 MAINTAINERS | 19 +--
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4d547e5604..a79543a877 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1236,24 +1236,18 @@ F: hw/openrisc/openrisc_sim.c
 PowerPC Machines
 
 405
-M: David Gibson 
-M: Greg Kurz 
 L: qemu-...@nongnu.org
-S: Odd Fixes
+S: Orphan
 F: hw/ppc/ppc405_boards.c
 
 Bamboo
-M: David Gibson 
-M: Greg Kurz 
 L: qemu-...@nongnu.org
-S: Odd Fixes
+S: Orphan
 F: hw/ppc/ppc440_bamboo.c
 
 e500
-M: David Gibson 
-M: Greg Kurz 
 L: qemu-...@nongnu.org
-S: Odd Fixes
+S: Orphan
 F: hw/ppc/e500*
 F: hw/gpio/mpc8xxx.c
 F: hw/i2c/mpc_i2c.c
@@ -1264,10 +1258,8 @@ F: include/hw/pci-host/ppce500.h
 F: pc-bios/u-boot.e500
 
 mpc8544ds
-M: David Gibson 
-M: Greg Kurz 
 L: qemu-...@nongnu.org
-S: Odd Fixes
+S: Orphan
 F: hw/ppc/mpc8544ds.c
 F: hw/ppc/mpc8544_guts.c
 F: tests/acceptance/ppc_mpc8544ds.py
@@ -1777,9 +1769,8 @@ F: include/hw/acpi/ghes.h
 F: docs/specs/acpi_hest_ghes.rst
 
 ppc4xx
-M: David Gibson 
 L: qemu-...@nongnu.org
-S: Odd Fixes
+S: Orphan
 F: hw/ppc/ppc4*.c
 F: hw/i2c/ppc4xx_i2c.c
 F: include/hw/ppc/ppc4xx.h
-- 
2.31.1




[PULL 30/44] spapr_numa.c: FORM2 NUMA affinity support

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

The main feature of FORM2 affinity support is the separation of NUMA
distances from ibm,associativity information. This allows for a more
flexible and straightforward NUMA distance assignment without relying on
complex associations between several levels of NUMA via
ibm,associativity matches. Another feature is its extensibility. This base
support contains the facilities for NUMA distance assignment, but in the
future more facilities will be added for latency, performance, bandwidth
and so on.

This patch implements the base FORM2 affinity support as follows:

- the use of FORM2 associativity is indicated by using bit 2 of byte 5
of ibm,architecture-vec-5. A FORM2 aware guest can choose to use FORM1
or FORM2 affinity. Setting both forms will default to FORM2. We're not
advertising FORM2 for pseries-6.1 and older machine versions to prevent
guest visible changes in those;

- ibm,associativity-reference-points has a new semantic. Instead of
being used to calculate distances via NUMA levels, it's now used to
indicate the primary domain index in the ibm,associativity domain of
each resource. In our case it's set to {0x4}, matching the position
where we already place logical_domain_id;

- two new RTAS DT artifacts are introduced: ibm,numa-lookup-index-table
and ibm,numa-distance-table. The index table is used to list all the
NUMA logical domains of the platform, in ascending order, and allows for
spartial NUMA configurations (although QEMU ATM doesn't support that).
ibm,numa-distance-table is an array that contains all the distances from
the first NUMA node to all other nodes, then the second NUMA node
distances to all other nodes and so on;

- get_max_dist_ref_points(), get_numa_assoc_size() and get_associativity()
now checks for OV5_FORM2_AFFINITY and returns FORM2 values if the guest
selected FORM2 affinity during CAS.

Reviewed-by: Greg Kurz 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210920174947.556324-7-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  |   8 ++
 hw/ppc/spapr_numa.c | 146 
 include/hw/ppc/spapr.h  |   9 +++
 include/hw/ppc/spapr_ovec.h |   1 +
 4 files changed, 164 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 524951def1..b7bee5f4ff 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2753,6 +2753,11 @@ static void spapr_machine_init(MachineState *machine)
 
 spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY);
 
+/* Do not advertise FORM2 NUMA support for pseries-6.1 and older */
+if (!smc->pre_6_2_numa_affinity) {
+spapr_ovec_set(spapr->ov5, OV5_FORM2_AFFINITY);
+}
+
 /* advertise support for dedicated HP event source to guests */
 if (spapr->use_hotplug_event_source) {
 spapr_ovec_set(spapr->ov5, OV5_HP_EVT);
@@ -4675,8 +4680,11 @@ DEFINE_SPAPR_MACHINE(6_2, "6.2", true);
  */
 static void spapr_machine_6_1_class_options(MachineClass *mc)
 {
+SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
 spapr_machine_6_2_class_options(mc);
 compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+smc->pre_6_2_numa_affinity = true;
 }
 
 DEFINE_SPAPR_MACHINE(6_1, "6.1", false);
diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 6718c0fdd1..13db321997 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -24,6 +24,10 @@
  */
 static int get_max_dist_ref_points(SpaprMachineState *spapr)
 {
+if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+return FORM2_DIST_REF_POINTS;
+}
+
 return FORM1_DIST_REF_POINTS;
 }
 
@@ -32,6 +36,10 @@ static int get_max_dist_ref_points(SpaprMachineState *spapr)
  */
 static int get_numa_assoc_size(SpaprMachineState *spapr)
 {
+if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+return FORM2_NUMA_ASSOC_SIZE;
+}
+
 return FORM1_NUMA_ASSOC_SIZE;
 }
 
@@ -52,6 +60,9 @@ static int get_vcpu_assoc_size(SpaprMachineState *spapr)
  */
 static const uint32_t *get_associativity(SpaprMachineState *spapr, int node_id)
 {
+if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+return spapr->FORM2_assoc_array[node_id];
+}
 return spapr->FORM1_assoc_array[node_id];
 }
 
@@ -295,14 +306,50 @@ static void 
spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
 spapr_numa_define_FORM1_domains(spapr);
 }
 
+/*
+ * Init NUMA FORM2 machine state data
+ */
+static void spapr_numa_FORM2_affinity_init(SpaprMachineState *spapr)
+{
+int i;
+
+/*
+ * For all resources but CPUs, FORM2 associativity arrays will
+ * be a size 2 array with the following format:
+ *
+ * ibm,associativity = {1, numa_id}
+ *
+ * CPUs will write an additional 'vcpu_id' on top of the arrays
+ * being initialized here. 'numa_id' is represented by the
+ * index 'i' of the loop.
+ *
+ * Given that this initialization is also valid for GPU associativity
+ * arrays

[PULL 42/44] MAINTAINERS: Remove David & Greg as reviewers/co-maintainers of powernv

2021-09-29 Thread David Gibson
With our interests moving to other areas, Greg and myself no longer have
capacity to be regular reviewers of code for the powernv machine type, let
alone co-maintainers.  Additionally, not being IBM employees, we don't have
easy access to the hardware information we'd need for good review.

Therefore, remove our names as reviewers and/or co-maintainers of the
powernv machine type, and the related XIVE interrupt controller.

Signed-off-by: David Gibson 
Reviewed-by: Greg Kurz 
Acked-by: Cédric Le Goater 
---
 MAINTAINERS | 4 
 1 file changed, 4 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index a79543a877..ed9691d65c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1331,8 +1331,6 @@ F: tests/acceptance/ppc_pseries.py
 
 PowerNV (Non-Virtualized)
 M: Cédric Le Goater 
-M: David Gibson 
-M: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Maintained
 F: hw/ppc/pnv*
@@ -2225,8 +2223,6 @@ T: git https://github.com/philmd/qemu.git fw_cfg-next
 
 XIVE
 M: Cédric Le Goater 
-R: David Gibson 
-R: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Supported
 F: hw/*/*xive*
-- 
2.31.1




[PULL 32/44] target/ppc: Convert debug to trace events (decrementer and IRQ)

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

Signed-off-by: Cédric Le Goater 
Message-Id: <20210920061203.989563-4-...@kaod.org>
Signed-off-by: David Gibson 
---
 hw/ppc/ppc.c| 169 
 hw/ppc/trace-events |  22 +-
 2 files changed, 82 insertions(+), 109 deletions(-)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index a327206a0a..b813ef732e 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -37,22 +37,6 @@
 #include "migration/vmstate.h"
 #include "trace.h"
 
-//#define PPC_DEBUG_IRQ
-//#define PPC_DEBUG_TB
-
-#ifdef PPC_DEBUG_IRQ
-#  define LOG_IRQ(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
-#else
-#  define LOG_IRQ(...) do { } while (0)
-#endif
-
-
-#ifdef PPC_DEBUG_TB
-#  define LOG_TB(...) qemu_log(__VA_ARGS__)
-#else
-#  define LOG_TB(...) do { } while (0)
-#endif
-
 static void cpu_ppc_tb_stop (CPUPPCState *env);
 static void cpu_ppc_tb_start (CPUPPCState *env);
 
@@ -86,9 +70,8 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
 }
 
 
-LOG_IRQ("%s: %p n_IRQ %d level %d => pending %08" PRIx32
-"req %08x\n", __func__, env, n_IRQ, level,
-env->pending_interrupts, CPU(cpu)->interrupt_request);
+trace_ppc_irq_set_exit(env, n_IRQ, level, env->pending_interrupts,
+   CPU(cpu)->interrupt_request);
 
 if (locked) {
 qemu_mutex_unlock_iothread();
@@ -102,8 +85,8 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
 CPUPPCState *env = &cpu->env;
 int cur_level;
 
-LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
-env, pin, level);
+trace_ppc_irq_set(env, pin, level);
+
 cur_level = (env->irq_input_state >> pin) & 1;
 /* Don't generate spurious events */
 if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
@@ -112,8 +95,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
 switch (pin) {
 case PPC6xx_INPUT_TBEN:
 /* Level sensitive - active high */
-LOG_IRQ("%s: %s the time base\n",
-__func__, level ? "start" : "stop");
+trace_ppc_irq_set_state("time base", level);
 if (level) {
 cpu_ppc_tb_start(env);
 } else {
@@ -122,14 +104,12 @@ static void ppc6xx_set_irq(void *opaque, int pin, int 
level)
 break;
 case PPC6xx_INPUT_INT:
 /* Level sensitive - active high */
-LOG_IRQ("%s: set the external IRQ state to %d\n",
-__func__, level);
+trace_ppc_irq_set_state("external IRQ", level);
 ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
 break;
 case PPC6xx_INPUT_SMI:
 /* Level sensitive - active high */
-LOG_IRQ("%s: set the SMI IRQ state to %d\n",
-__func__, level);
+trace_ppc_irq_set_state("SMI IRQ", level);
 ppc_set_irq(cpu, PPC_INTERRUPT_SMI, level);
 break;
 case PPC6xx_INPUT_MCP:
@@ -138,8 +118,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
  *603/604/740/750: check HID0[EMCP]
  */
 if (cur_level == 1 && level == 0) {
-LOG_IRQ("%s: raise machine check state\n",
-__func__);
+trace_ppc_irq_set_state("machine check", 1);
 ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1);
 }
 break;
@@ -148,20 +127,19 @@ static void ppc6xx_set_irq(void *opaque, int pin, int 
level)
 /* XXX: TODO: relay the signal to CKSTP_OUT pin */
 /* XXX: Note that the only way to restart the CPU is to reset it */
 if (level) {
-LOG_IRQ("%s: stop the CPU\n", __func__);
+trace_ppc_irq_cpu("stop");
 cs->halted = 1;
 }
 break;
 case PPC6xx_INPUT_HRESET:
 /* Level sensitive - active low */
 if (level) {
-LOG_IRQ("%s: reset the CPU\n", __func__);
+trace_ppc_irq_reset("CPU");
 cpu_interrupt(cs, CPU_INTERRUPT_RESET);
 }
 break;
 case PPC6xx_INPUT_SRESET:
-LOG_IRQ("%s: set the RESET IRQ state to %d\n",
-__func__, level);
+trace_ppc_irq_set_state("RESET IRQ", level);
 ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
 break;
 default:
@@ -190,8 +168,8 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
 CPUPPCState *env = &cpu->env;
 int cur_level;
 
-LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
-env, pin, level);
+trace_ppc_irq_set(env, pin, level);
+
 cur_level = (env->irq_input_state >> pin) & 1;
 /* Don't generate spurious events */
 if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
@@ -200,14 +178,12 @@ static vo

[PULL 37/44] spapr_numa.c: fixes in spapr_numa_FORM2_write_rtas_tables()

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

This patch has a handful of modifications for the recent added
FORM2 support:

- to not allocate more than the necessary size in 'distance_table'.
At this moment the array is oversized due to allocating uint32_t for
all elements, when most of them fits in an uint8_t. Fix it by
changing the array to uint8_t and allocating the exact size;

- use stl_be_p() to store the uint32_t at the start of 'distance_table';

- use sizeof(uint32_t) to skip the uint32_t length when populating the
distances;

- use the NUMA_DISTANCE_MIN macro from sysemu/numa.h to avoid hardcoding
the local distance value.

Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210922122852.130054-2-danielhb...@gmail.com>
Reviewed-by: Greg Kurz 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_numa.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 58d5dc7084..5822938448 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -502,9 +502,8 @@ static void 
spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
 int nb_numa_nodes = ms->numa_state->num_nodes;
 int distance_table_entries = nb_numa_nodes * nb_numa_nodes;
 g_autofree uint32_t *lookup_index_table = NULL;
-g_autofree uint32_t *distance_table = NULL;
+g_autofree uint8_t *distance_table = NULL;
 int src, dst, i, distance_table_size;
-uint8_t *node_distances;
 
 /*
  * ibm,numa-lookup-index-table: array with length and a
@@ -531,11 +530,13 @@ static void 
spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
  * array because NUMA ids can be sparse (node 0 is the first,
  * node 8 is the second ...).
  */
-distance_table = g_new0(uint32_t, distance_table_entries + 1);
-distance_table[0] = cpu_to_be32(distance_table_entries);
+distance_table_size = distance_table_entries * sizeof(uint8_t) +
+  sizeof(uint32_t);
+distance_table = g_new0(uint8_t, distance_table_size);
+stl_be_p(distance_table, distance_table_entries);
 
-node_distances = (uint8_t *)&distance_table[1];
-i = 0;
+/* Skip the uint32_t array length at the start */
+i = sizeof(uint32_t);
 
 for (src = 0; src < nb_numa_nodes; src++) {
 for (dst = 0; dst < nb_numa_nodes; dst++) {
@@ -546,16 +547,14 @@ static void 
spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
  * adding the numa_info to retrieve distance info from.
  */
 if (src == dst) {
-node_distances[i++] = 10;
+distance_table[i++] = NUMA_DISTANCE_MIN;
 continue;
 }
 
-node_distances[i++] = numa_info[src].distance[dst];
+distance_table[i++] = numa_info[src].distance[dst];
 }
 }
 
-distance_table_size = distance_table_entries * sizeof(uint8_t) +
-  sizeof(uint32_t);
 _FDT(fdt_setprop(fdt, rtas, "ibm,numa-distance-table",
  distance_table, distance_table_size));
 }
-- 
2.31.1




[PULL 36/44] hw/intc: openpic: Clean up the styles

2021-09-29 Thread David Gibson
From: Bin Meng 

Correct the multi-line comment format. No functional changes.

Signed-off-by: Bin Meng 

Message-Id: <20210918032653.646370-3-bin.m...@windriver.com>
Signed-off-by: David Gibson 
---
 hw/intc/openpic.c| 55 +---
 include/hw/ppc/openpic.h |  9 ---
 2 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 23eafb32bd..49504e740f 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -47,7 +47,7 @@
 #include "qemu/timer.h"
 #include "qemu/error-report.h"
 
-//#define DEBUG_OPENPIC
+/* #define DEBUG_OPENPIC */
 
 #ifdef DEBUG_OPENPIC
 static const int debug_openpic = 1;
@@ -118,7 +118,8 @@ static FslMpicInfo fsl_mpic_42 = {
 #define ILR_INTTGT_CINT   0x01 /* critical */
 #define ILR_INTTGT_MCP0x02 /* machine check */
 
-/* The currently supported INTTGT values happen to be the same as QEMU's
+/*
+ * The currently supported INTTGT values happen to be the same as QEMU's
  * openpic output codes, but don't depend on this.  The output codes
  * could change (unlikely, but...) or support could be added for
  * more INTTGT values.
@@ -177,10 +178,11 @@ static void openpic_cpu_write_internal(void *opaque, 
hwaddr addr,
uint32_t val, int idx);
 static void openpic_reset(DeviceState *d);
 
-/* Convert between openpic clock ticks and nanosecs.  In the hardware the clock
-   frequency is driven by board inputs to the PIC which the PIC would then
-   divide by 4 or 8.  For now hard code to 25MZ.
-*/
+/*
+ * Convert between openpic clock ticks and nanosecs.  In the hardware the clock
+ * frequency is driven by board inputs to the PIC which the PIC would then
+ * divide by 4 or 8.  For now hard code to 25MZ.
+ */
 #define OPENPIC_TIMER_FREQ_MHZ 25
 #define OPENPIC_TIMER_NS_PER_TICK (1000 / OPENPIC_TIMER_FREQ_MHZ)
 static inline uint64_t ns_to_ticks(uint64_t ns)
@@ -253,7 +255,8 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, 
int n_IRQ,
 __func__, src->output, n_IRQ, active, was_active,
 dst->outputs_active[src->output]);
 
-/* On Freescale MPIC, critical interrupts ignore priority,
+/*
+ * On Freescale MPIC, critical interrupts ignore priority,
  * IACK, EOI, etc.  Before MPIC v4.1 they also ignore
  * masking.
  */
@@ -276,7 +279,8 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, 
int n_IRQ,
 
 priority = IVPR_PRIORITY(src->ivpr);
 
-/* Even if the interrupt doesn't have enough priority,
+/*
+ * Even if the interrupt doesn't have enough priority,
  * it is still raised, in case ctpr is lowered later.
  */
 if (active) {
@@ -408,7 +412,8 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int 
level)
 }
 
 if (src->output != OPENPIC_OUTPUT_INT) {
-/* Edge-triggered interrupts shouldn't be used
+/*
+ * Edge-triggered interrupts shouldn't be used
  * with non-INT delivery, but just in case,
  * try to make it do something sane rather than
  * cause an interrupt storm.  This is close to
@@ -501,7 +506,8 @@ static inline void write_IRQreg_ivpr(OpenPICState *opp, int 
n_IRQ, uint32_t val)
 {
 uint32_t mask;
 
-/* NOTE when implementing newer FSL MPIC models: starting with v4.0,
+/*
+ * NOTE when implementing newer FSL MPIC models: starting with v4.0,
  * the polarity bit is read-only on internal interrupts.
  */
 mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
@@ -511,7 +517,8 @@ static inline void write_IRQreg_ivpr(OpenPICState *opp, int 
n_IRQ, uint32_t val)
 opp->src[n_IRQ].ivpr =
 (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
 
-/* For FSL internal interrupts, The sense bit is reserved and zero,
+/*
+ * For FSL internal interrupts, The sense bit is reserved and zero,
  * and the interrupt is always level-triggered.  Timers and IPIs
  * have no sense or polarity bits, and are edge-triggered.
  */
@@ -695,16 +702,20 @@ static void qemu_timer_cb(void *opaque)
 openpic_set_irq(opp, n_IRQ, 0);
 }
 
-/* If enabled is true, arranges for an interrupt to be raised val clocks into
-   the future, if enabled is false cancels the timer. */
+/*
+ * If enabled is true, arranges for an interrupt to be raised val clocks into
+ * the future, if enabled is false cancels the timer.
+ */
 static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled)
 {
 uint64_t ns = ticks_to_ns(val & ~TCCR_TOG);
-/* A count of zero causes a timer to be set to expire immediately.  This
-   effectively stops the simulation since the timer is constantly expiring
-   which prevents guest code execution, so we don't honor that
-   configuration.  On real hardware, this situation would generate an
-   interrupt on every clock cycle if the interrupt was

[PULL 40/44] MAINTAINERS: Remove David & Greg as reviewers for a number of boards

2021-09-29 Thread David Gibson
Greg and I are moving towards other areas and no longer have capacity to
act as regular reviewers for several of the secondary ppc machine types.
So, remove ourselves as reviewers for Macintosh, PReP, sam460ex and
pegasos2 in MAINTAINERS.

Signed-off-by: David Gibson 
Reviewed-by: Greg Kurz 
Acked-by: BALATON Zoltan 
Reviewed-by: Cédric Le Goater 
---
 MAINTAINERS | 9 -
 1 file changed, 9 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index b57a4c733b..4d547e5604 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1274,8 +1274,6 @@ F: tests/acceptance/ppc_mpc8544ds.py
 
 New World (mac99)
 M: Mark Cave-Ayland 
-R: David Gibson 
-R: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Odd Fixes
 F: hw/ppc/mac_newworld.c
@@ -1294,8 +1292,6 @@ F: pc-bios/qemu_vga.ndrv
 
 Old World (g3beige)
 M: Mark Cave-Ayland 
-R: David Gibson 
-R: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Odd Fixes
 F: hw/ppc/mac_oldworld.c
@@ -1309,8 +1305,6 @@ F: pc-bios/qemu_vga.ndrv
 
 PReP
 M: Hervé Poussineau 
-R: David Gibson 
-R: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Maintained
 F: hw/ppc/prep.c
@@ -1367,8 +1361,6 @@ F: tests/acceptance/ppc_virtex_ml507.py
 
 sam460ex
 M: BALATON Zoltan 
-R: David Gibson 
-R: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Maintained
 F: hw/ppc/sam460ex.c
@@ -1382,7 +1374,6 @@ F: roms/u-boot-sam460ex
 
 pegasos2
 M: BALATON Zoltan 
-R: David Gibson 
 L: qemu-...@nongnu.org
 S: Maintained
 F: hw/ppc/pegasos2.c
-- 
2.31.1




[PULL 29/44] spapr: move FORM1 verifications to post CAS

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

FORM2 NUMA affinity is prepared to deal with empty (memory/cpu less)
NUMA nodes. This is used by the DAX KMEM driver to locate a PAPR SCM
device that has a different latency than the original NUMA node from the
regular memory. FORM2 is also able  to deal with asymmetric NUMA
distances gracefully, something that our FORM1 implementation doesn't
do.

Move these FORM1 verifications to a new function and wait until after
CAS, when we're sure that we're sticking with FORM1, to enforce them.

Reviewed-by: Greg Kurz 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210920174947.556324-6-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  | 33 ---
 hw/ppc/spapr_hcall.c|  6 +
 hw/ppc/spapr_numa.c | 53 -
 include/hw/ppc/spapr_numa.h |  1 +
 4 files changed, 54 insertions(+), 39 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 270106975b..524951def1 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2774,39 +2774,6 @@ static void spapr_machine_init(MachineState *machine)
 /* init CPUs */
 spapr_init_cpus(spapr);
 
-/*
- * check we don't have a memory-less/cpu-less NUMA node
- * Firmware relies on the existing memory/cpu topology to provide the
- * NUMA topology to the kernel.
- * And the linux kernel needs to know the NUMA topology at start
- * to be able to hotplug CPUs later.
- */
-if (machine->numa_state->num_nodes) {
-for (i = 0; i < machine->numa_state->num_nodes; ++i) {
-/* check for memory-less node */
-if (machine->numa_state->nodes[i].node_mem == 0) {
-CPUState *cs;
-int found = 0;
-/* check for cpu-less node */
-CPU_FOREACH(cs) {
-PowerPCCPU *cpu = POWERPC_CPU(cs);
-if (cpu->node_id == i) {
-found = 1;
-break;
-}
-}
-/* memory-less and cpu-less node */
-if (!found) {
-error_report(
-   "Memory-less/cpu-less nodes are not supported (node 
%d)",
- i);
-exit(1);
-}
-}
-}
-
-}
-
 spapr->gpu_numa_id = spapr_numa_initial_nvgpu_numa_id(machine);
 
 /* Init numa_assoc_array */
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 9056644890..222c1b6bbd 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1198,6 +1198,12 @@ target_ulong do_client_architecture_support(PowerPCCPU 
*cpu,
 spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00);
 spapr_ovec_cleanup(ov1_guest);
 
+/*
+ * Check for NUMA affinity conditions now that we know which NUMA
+ * affinity the guest will use.
+ */
+spapr_numa_associativity_check(spapr);
+
 /*
  * Ensure the guest asks for an interrupt mode we support;
  * otherwise terminate the boot.
diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index dce9ce987a..6718c0fdd1 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -193,6 +193,48 @@ static void 
spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
 
 }
 
+static void spapr_numa_FORM1_affinity_check(MachineState *machine)
+{
+int i;
+
+/*
+ * Check we don't have a memory-less/cpu-less NUMA node
+ * Firmware relies on the existing memory/cpu topology to provide the
+ * NUMA topology to the kernel.
+ * And the linux kernel needs to know the NUMA topology at start
+ * to be able to hotplug CPUs later.
+ */
+if (machine->numa_state->num_nodes) {
+for (i = 0; i < machine->numa_state->num_nodes; ++i) {
+/* check for memory-less node */
+if (machine->numa_state->nodes[i].node_mem == 0) {
+CPUState *cs;
+int found = 0;
+/* check for cpu-less node */
+CPU_FOREACH(cs) {
+PowerPCCPU *cpu = POWERPC_CPU(cs);
+if (cpu->node_id == i) {
+found = 1;
+break;
+}
+}
+/* memory-less and cpu-less node */
+if (!found) {
+error_report(
+"Memory-less/cpu-less nodes are not supported with FORM1 NUMA (node %d)", i);
+exit(EXIT_FAILURE);
+}
+}
+}
+}
+
+if (!spapr_numa_is_symmetrical(machine)) {
+error_report(
+"Asymmetrical NUMA topologies aren't supported in the pSeries machine using 
FORM1 NUMA");
+exit(EXIT_FAILURE);
+}
+}
+
 /*
  * Set NUMA machine state data based on FORM1 affinity semantics.
  */
@@ -250,12 +292,6 @@ static void 
spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
   

[PULL 21/44] target/ppc: Convert debug to trace events (exceptions)

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

Signed-off-by: Cédric Le Goater 
Message-Id: <20210920061203.989563-2-...@kaod.org>
Signed-off-by: David Gibson 
---
 target/ppc/excp_helper.c | 38 ++
 target/ppc/trace-events  |  8 
 2 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index d7e32ee107..b7d1767920 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -23,20 +23,14 @@
 #include "internal.h"
 #include "helper_regs.h"
 
+#include "trace.h"
+
 #ifdef CONFIG_TCG
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #endif
 
-/* #define DEBUG_OP */
 /* #define DEBUG_SOFTWARE_TLB */
-/* #define DEBUG_EXCEPTIONS */
-
-#ifdef DEBUG_EXCEPTIONS
-#  define LOG_EXCP(...) qemu_log(__VA_ARGS__)
-#else
-#  define LOG_EXCP(...) do { } while (0)
-#endif
 
 /*/
 /* Exception processing */
@@ -414,12 +408,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
excp_model, int excp)
 }
 break;
 case POWERPC_EXCP_DSI:   /* Data storage exception   */
-LOG_EXCP("DSI exception: DSISR=" TARGET_FMT_lx" DAR=" TARGET_FMT_lx
- "\n", env->spr[SPR_DSISR], env->spr[SPR_DAR]);
+trace_ppc_excp_dsi(env->spr[SPR_DSISR], env->spr[SPR_DAR]);
 break;
 case POWERPC_EXCP_ISI:   /* Instruction storage exception*/
-LOG_EXCP("ISI exception: msr=" TARGET_FMT_lx ", nip=" TARGET_FMT_lx
- "\n", msr, env->nip);
+trace_ppc_excp_isi(msr, env->nip);
 msr |= env->error_code;
 break;
 case POWERPC_EXCP_EXTERNAL:  /* External input   */
@@ -474,7 +466,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
excp_model, int excp)
 switch (env->error_code & ~0xF) {
 case POWERPC_EXCP_FP:
 if ((msr_fe0 == 0 && msr_fe1 == 0) || msr_fp == 0) {
-LOG_EXCP("Ignore floating point exception\n");
+trace_ppc_excp_fp_ignore();
 cs->exception_index = POWERPC_EXCP_NONE;
 env->error_code = 0;
 return;
@@ -489,7 +481,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
excp_model, int excp)
 env->spr[SPR_BOOKE_ESR] = ESR_FP;
 break;
 case POWERPC_EXCP_INVAL:
-LOG_EXCP("Invalid instruction at " TARGET_FMT_lx "\n", env->nip);
+trace_ppc_excp_inval(env->nip);
 msr |= 0x0008;
 env->spr[SPR_BOOKE_ESR] = ESR_PIL;
 break;
@@ -547,10 +539,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
excp_model, int excp)
 break;
 case POWERPC_EXCP_FIT:   /* Fixed-interval timer interrupt   */
 /* FIT on 4xx */
-LOG_EXCP("FIT exception\n");
+trace_ppc_excp_print("FIT");
 break;
 case POWERPC_EXCP_WDT:   /* Watchdog timer interrupt */
-LOG_EXCP("WDT exception\n");
+trace_ppc_excp_print("WDT");
 switch (excp_model) {
 case POWERPC_EXCP_BOOKE:
 srr0 = SPR_BOOKE_CSRR0;
@@ -657,7 +649,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
excp_model, int excp)
 #endif
 break;
 case POWERPC_EXCP_PIT:   /* Programmable interval timer interrupt*/
-LOG_EXCP("PIT exception\n");
+trace_ppc_excp_print("PIT");
 break;
 case POWERPC_EXCP_IO:/* IO error exception   */
 /* XXX: TODO */
@@ -1115,14 +1107,6 @@ bool ppc_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)
 
 #endif /* !CONFIG_USER_ONLY */
 
-#if defined(DEBUG_OP)
-static void cpu_dump_rfi(target_ulong RA, target_ulong msr)
-{
-qemu_log("Return from exception at " TARGET_FMT_lx " with flags "
- TARGET_FMT_lx "\n", RA, msr);
-}
-#endif
-
 /*/
 /* Exceptions processing helpers */
 
@@ -1221,9 +1205,7 @@ static inline void do_rfi(CPUPPCState *env, target_ulong 
nip, target_ulong msr)
 /* XXX: beware: this is false if VLE is supported */
 env->nip = nip & ~((target_ulong)0x0003);
 hreg_store_msr(env, msr, 1);
-#if defined(DEBUG_OP)
-cpu_dump_rfi(env->nip, env->msr);
-#endif
+trace_ppc_excp_rfi(env->nip, env->msr);
 /*
  * No need to raise an exception here, as rfi is always the last
  * insn of a TB
diff --git a/target/ppc/trace-events b/target/ppc/trace-events
index c88cfccf8d..53b107f56e 100644
--- a/target/ppc/trace-events
+++ b/target/ppc/trace-events
@@ -28,3 +28,11 @@ kvm_handle_epr(void) "handle epr"
 kvm_handle_watchdog_expiry(void) "handle watchdog expiry"
 kvm_handle_debug_exception(void) "handle debug exception"
 kvm_handle_nmi_exception(void) "handle NMI exception"
+
+# excp_helper.c
+ppc_excp_rfi(uint64_t nip, ui

[PULL 27/44] spapr_numa.c: parametrize FORM1 macros

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

The next preliminary step to introduce NUMA FORM2 affinity is to make
the existing code independent of FORM1 macros and values, i.e.
MAX_DISTANCE_REF_POINTS, NUMA_ASSOC_SIZE and VCPU_ASSOC_SIZE. This patch
accomplishes that by doing the following:

- move the NUMA related macros from spapr.h to spapr_numa.c where they
are used. spapr.h gets instead a 'NUMA_NODES_MAX_NUM' macro that is used
to refer to the maximum number of NUMA nodes, including GPU nodes, that
the machine can support;

- MAX_DISTANCE_REF_POINTS and NUMA_ASSOC_SIZE are renamed to
FORM1_DIST_REF_POINTS and FORM1_NUMA_ASSOC_SIZE. These FORM1 specific
macros are used in FORM1 init functions;

- code that uses MAX_DISTANCE_REF_POINTS now retrieves the
max_dist_ref_points value using get_max_dist_ref_points().
NUMA_ASSOC_SIZE is replaced by get_numa_assoc_size() and VCPU_ASSOC_SIZE
is replaced by get_vcpu_assoc_size(). These functions are used by the
generic device tree functions and h_home_node_associativity() and will
allow them to switch between FORM1 and FORM2 without changing their core
logic.

Reviewed-by: Greg Kurz 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210920174947.556324-4-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_numa.c| 74 ++
 include/hw/ppc/spapr.h | 28 
 2 files changed, 67 insertions(+), 35 deletions(-)

diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index bf520d42b2..08e2d6aed8 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -19,6 +19,33 @@
 /* Moved from hw/ppc/spapr_pci_nvlink2.c */
 #define SPAPR_GPU_NUMA_ID   (cpu_to_be32(1))
 
+/*
+ * Retrieves max_dist_ref_points of the current NUMA affinity.
+ */
+static int get_max_dist_ref_points(SpaprMachineState *spapr)
+{
+return FORM1_DIST_REF_POINTS;
+}
+
+/*
+ * Retrieves numa_assoc_size of the current NUMA affinity.
+ */
+static int get_numa_assoc_size(SpaprMachineState *spapr)
+{
+return FORM1_NUMA_ASSOC_SIZE;
+}
+
+/*
+ * Retrieves vcpu_assoc_size of the current NUMA affinity.
+ *
+ * vcpu_assoc_size is the size of ibm,associativity array
+ * for CPUs, which has an extra element (vcpu_id) in the end.
+ */
+static int get_vcpu_assoc_size(SpaprMachineState *spapr)
+{
+return get_numa_assoc_size(spapr) + 1;
+}
+
 static bool spapr_numa_is_symmetrical(MachineState *ms)
 {
 int src, dst;
@@ -96,7 +123,7 @@ static void 
spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
  * considered a match with associativity domains of node 0.
  */
 for (i = 1; i < nb_numa_nodes; i++) {
-for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
+for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
 spapr->numa_assoc_array[i][j] = cpu_to_be32(i);
 }
 }
@@ -134,7 +161,7 @@ static void 
spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
  *
  * The Linux kernel will assume that the distance between src and
  * dst, in this case of no match, is 10 (local distance) doubled
- * for each NUMA it didn't match. We have MAX_DISTANCE_REF_POINTS
+ * for each NUMA it didn't match. We have FORM1_DIST_REF_POINTS
  * levels (4), so this gives us 10*2*2*2*2 = 160.
  *
  * This logic can be seen in the Linux kernel source code, as of
@@ -169,7 +196,7 @@ static void 
spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
 
 /*
  * For all associativity arrays: first position is the size,
- * position MAX_DISTANCE_REF_POINTS is always the numa_id,
+ * position FORM1_DIST_REF_POINTS is always the numa_id,
  * represented by the index 'i'.
  *
  * This will break on sparse NUMA setups, when/if QEMU starts
@@ -177,8 +204,8 @@ static void 
spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
  * 'i' will be a valid node_id set by the user.
  */
 for (i = 0; i < nb_numa_nodes; i++) {
-spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
-spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
+spapr->numa_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
+spapr->numa_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
 }
 
 /*
@@ -192,15 +219,15 @@ static void 
spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
 max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;
 
 for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
-spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
+spapr->numa_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
 
-for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
+for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
 uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
  SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
 spapr->numa_assoc_array[i][j] = 

[PULL 39/44] MAINTAINERS: Remove machine specific files from ppc TCG CPUs entry

2021-09-29 Thread David Gibson
Currently the PowerPC TCG CPUs entry in MAINTAINERS lists all of hw/ppc/
and include/hw/ppc.  Nearly all the files in those places are related to
specific ppc machine types, rather than to the actual CPUs however.  Those
machine types list their own files separately, often overlapping with this.
For greater clarity, remove these misleading entries from the TCG CPUs
stanza, leaving just hw/ppc/ppc.c and hw/ppc/ppc_booke.c which are the only
ones common to a wide range of PPC TCG cpus each.

Signed-off-by: David Gibson 
---
 MAINTAINERS | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bf1fc5b21e..b57a4c733b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -271,8 +271,9 @@ M: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Maintained
 F: target/ppc/
-F: hw/ppc/
-F: include/hw/ppc/
+F: hw/ppc/ppc.c
+F: hw/ppc/ppc_booke.c
+F: include/hw/ppc/ppc.h
 F: disas/ppc.c
 
 RISC-V TCG CPUs
-- 
2.31.1




[PULL 26/44] spapr_numa.c: scrap 'legacy_numa' concept

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

When first introduced, 'legacy_numa' was a way to refer to guests that
either wouldn't be affected by associativity domain calculations, namely
the ones with only 1 NUMA node, and pre 5.2 guests that shouldn't be
affected by it because it would be an userspace change. Calling these
cases 'legacy_numa' was a convenient way to label these cases.

We're about to introduce a new NUMA affinity, FORM2, and this concept
of 'legacy_numa' is now a bit misleading because, although it is called
'legacy' it is in fact a FORM1 exclusive contraint.

This patch removes spapr_machine_using_legacy_numa() and open code the
conditions in each caller. While we're at it, move the chunk inside
spapr_numa_FORM1_affinity_init() that sets all numa_assoc_array domains
with 'node_id' to spapr_numa_define_FORM1_domains(). This chunk was
being executed if !pre_5_2_numa_associativity and num_nodes => 1, the
same conditions in which spapr_numa_define_FORM1_domains() is called
shortly after.

Reviewed-by: Greg Kurz 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210920174947.556324-3-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_numa.c | 47 +++--
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 786def7c73..bf520d42b2 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -19,15 +19,6 @@
 /* Moved from hw/ppc/spapr_pci_nvlink2.c */
 #define SPAPR_GPU_NUMA_ID   (cpu_to_be32(1))
 
-static bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr)
-{
-MachineState *machine = MACHINE(spapr);
-SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
-
-return smc->pre_5_2_numa_associativity ||
-   machine->numa_state->num_nodes <= 1;
-}
-
 static bool spapr_numa_is_symmetrical(MachineState *ms)
 {
 int src, dst;
@@ -97,7 +88,18 @@ static void 
spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
 MachineState *ms = MACHINE(spapr);
 NodeInfo *numa_info = ms->numa_state->nodes;
 int nb_numa_nodes = ms->numa_state->num_nodes;
-int src, dst, i;
+int src, dst, i, j;
+
+/*
+ * Fill all associativity domains of non-zero NUMA nodes with
+ * node_id. This is required because the default value (0) is
+ * considered a match with associativity domains of node 0.
+ */
+for (i = 1; i < nb_numa_nodes; i++) {
+for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
+spapr->numa_assoc_array[i][j] = cpu_to_be32(i);
+}
+}
 
 for (src = 0; src < nb_numa_nodes; src++) {
 for (dst = src; dst < nb_numa_nodes; dst++) {
@@ -164,7 +166,6 @@ static void 
spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 int nb_numa_nodes = machine->numa_state->num_nodes;
 int i, j, max_nodes_with_gpus;
-bool using_legacy_numa = spapr_machine_using_legacy_numa(spapr);
 
 /*
  * For all associativity arrays: first position is the size,
@@ -178,17 +179,6 @@ static void 
spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
 for (i = 0; i < nb_numa_nodes; i++) {
 spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
 spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
-
-/*
- * Fill all associativity domains of non-zero NUMA nodes with
- * node_id. This is required because the default value (0) is
- * considered a match with associativity domains of node 0.
- */
-if (!using_legacy_numa && i != 0) {
-for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
-spapr->numa_assoc_array[i][j] = cpu_to_be32(i);
-}
-}
 }
 
 /*
@@ -214,11 +204,13 @@ static void 
spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
 }
 
 /*
- * Legacy NUMA guests (pseries-5.1 and older, or guests with only
- * 1 NUMA node) will not benefit from anything we're going to do
- * after this point.
+ * Guests pseries-5.1 and older uses zeroed associativity domains,
+ * i.e. no domain definition based on NUMA distance input.
+ *
+ * Same thing with guests that have only one NUMA node.
  */
-if (using_legacy_numa) {
+if (smc->pre_5_2_numa_associativity ||
+machine->numa_state->num_nodes <= 1) {
 return;
 }
 
@@ -334,7 +326,8 @@ static void 
spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr,
 cpu_to_be32(maxdomain)
 };
 
-if (spapr_machine_using_legacy_numa(spapr)) {
+if (smc->pre_5_2_numa_associativity ||
+ms->numa_state->num_nodes <= 1) {
 uint32_t legacy_refpoints[] = {
 cpu_to_be32(0x4),
 cpu_to_be32(0x4),
-- 
2.31.1




[PULL 19/44] spapr: use DEVICE_UNPLUG_GUEST_ERROR to report unplug errors

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
error path, signalling that the hotunplug process wasn't successful.
This allow us to send a DEVICE_UNPLUG_GUEST_ERROR in drc_unisolate_logical()
to signal this error to the management layer.

We also have another error path in spapr_memory_unplug_rollback() for
configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
in the hotunplug error path, but it will reconfigure them. Let's send
the DEVICE_UNPLUG_GUEST_ERROR event in that code path as well to cover the
case of older kernels.

Acked-by: David Gibson 
Reviewed-by: Greg Kurz 
Reviewed-by: Markus Armbruster 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210907004755.424931-7-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 10 +-
 hw/ppc/spapr_drc.c |  9 +
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index ac11c8a728..270106975b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -29,6 +29,7 @@
 #include "qemu/datadir.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qapi/visitor.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/hostmem.h"
@@ -3686,11 +3687,18 @@ void spapr_memory_unplug_rollback(SpaprMachineState 
*spapr, DeviceState *dev)
 
 /*
  * Tell QAPI that something happened and the memory
- * hotunplug wasn't successful.
+ * hotunplug wasn't successful. Keep sending
+ * MEM_UNPLUG_ERROR even while sending
+ * DEVICE_UNPLUG_GUEST_ERROR until the deprecation of
+ * MEM_UNPLUG_ERROR is due.
  */
 qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
  "for device %s", dev->id);
+
 qapi_event_send_mem_unplug_error(dev->id ? : "", qapi_error);
+
+qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id,
+  dev->canonical_path);
 }
 
 /* Callback to be called during DRC release. */
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index a4d9496f76..f8ac0a10df 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -17,6 +17,8 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qom/object.h"
 #include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
 #include "hw/ppc/spapr.h" /* for RTAS return codes */
@@ -173,10 +175,9 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
  "for device %s", drc->dev->id);
 }
 
-/*
- * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
- * it is implemented.
- */
+qapi_event_send_device_unplug_guest_error(!!drc->dev->id,
+  drc->dev->id,
+  
drc->dev->canonical_path);
 }
 
 return RTAS_OUT_SUCCESS; /* Nothing to do */
-- 
2.31.1




[PULL 15/44] spapr.c: handle dev->id in spapr_memory_unplug_rollback()

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

As done in hw/acpi/memory_hotplug.c, pass an empty string if dev->id
is NULL to qapi_event_send_mem_unplug_error() to avoid relying on
a behavior that can be changed in the future.

Suggested-by: Markus Armbruster 
Reviewed-by: Greg Kurz 
Reviewed-by: David Gibson 
Reviewed-by: Markus Armbruster 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210907004755.424931-3-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index d39fd4e644..ac11c8a728 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -3690,7 +3690,7 @@ void spapr_memory_unplug_rollback(SpaprMachineState 
*spapr, DeviceState *dev)
  */
 qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
  "for device %s", dev->id);
-qapi_event_send_mem_unplug_error(dev->id, qapi_error);
+qapi_event_send_mem_unplug_error(dev->id ? : "", qapi_error);
 }
 
 /* Callback to be called during DRC release. */
-- 
2.31.1




[PULL 31/44] spapr_numa.c: handle auto NUMA node with no distance info

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

numa_complete_configuration() in hw/core/numa.c always adds a NUMA node
for the pSeries machine if none was specified, but without node distance
information for the single node created.

NUMA FORM1 affinity code didn't rely on numa_state information to do its
job, but FORM2 does. As is now, this is the result of a pSeries guest
with NUMA FORM2 affinity when no NUMA nodes is specified:

$ numactl -H
available: 1 nodes (0)
node 0 cpus: 0
node 0 size: 16222 MB
node 0 free: 15681 MB
No distance information available.

This can be amended in spapr_numa_FORM2_write_rtas_tables(). We're
enforcing that the local distance (the distance to the node to itself) is
always 10. This allows for the proper creation of the NUMA distance tables,
fixing the output of 'numactl -H' in the guest:

$ numactl -H
available: 1 nodes (0)
node 0 cpus: 0
node 0 size: 16222 MB
node 0 free: 15685 MB
node distances:
node   0
  0:  10

CC: Igor Mammedov 
Reviewed-by: Greg Kurz 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210920174947.556324-8-danielhb...@gmail.com>
Acked-by: Igor Mammedov 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_numa.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 13db321997..58d5dc7084 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -539,6 +539,17 @@ static void 
spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
 
 for (src = 0; src < nb_numa_nodes; src++) {
 for (dst = 0; dst < nb_numa_nodes; dst++) {
+/*
+ * We need to be explicit with the local distance
+ * value to cover the case where the user didn't added any
+ * NUMA nodes, but QEMU adds the default NUMA node without
+ * adding the numa_info to retrieve distance info from.
+ */
+if (src == dst) {
+node_distances[i++] = 10;
+continue;
+}
+
 node_distances[i++] = numa_info[src].distance[dst];
 }
 }
-- 
2.31.1




[PULL 23/44] target/ppc: add LPCR[HR] to DisasContext and hflags

2021-09-29 Thread David Gibson
From: Matheus Ferst 

Add a Host Radix field (hr) in DisasContext with LPCR[HR] value to allow
us to decide between Radix and HPT while validating instructions
arguments. Note that PowerISA v3.1 does not require LPCR[HR] and PATE.HR
to match if the thread is in ultravisor/hypervisor real addressing mode,
so ctx->hr may be invalid if ctx->hv and ctx->dr are set.

Signed-off-by: Matheus Ferst 
Reviewed-by: Daniel Henrique Barboza 
Message-Id: <20210917114751.206845-2-matheus.fe...@eldorado.org.br>
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: David Gibson 
---
 target/ppc/cpu.h | 1 +
 target/ppc/helper_regs.c | 3 +++
 target/ppc/translate.c   | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 01d3773bc7..baa4e7c34d 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -600,6 +600,7 @@ enum {
 HFLAGS_64 = 2,   /* computed from MSR_CE and MSR_SF */
 HFLAGS_GTSE = 3, /* computed from SPR_LPCR[GTSE] */
 HFLAGS_DR = 4,   /* MSR_DR */
+HFLAGS_HR = 5,   /* computed from SPR_LPCR[HR] */
 HFLAGS_SPE = 6,  /* from MSR_SPE if cpu has SPE; avoid overlap w/ MSR_VR */
 HFLAGS_TM = 8,   /* computed from MSR_TM */
 HFLAGS_BE = 9,   /* MSR_BE -- from elsewhere on embedded ppc */
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index 405450d863..1bfb480ecf 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -106,6 +106,9 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
 if (env->spr[SPR_LPCR] & LPCR_GTSE) {
 hflags |= 1 << HFLAGS_GTSE;
 }
+if (env->spr[SPR_LPCR] & LPCR_HR) {
+hflags |= 1 << HFLAGS_HR;
+}
 
 #ifndef CONFIG_USER_ONLY
 if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) {
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 5d8b06bd80..9af1624ad2 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -175,6 +175,7 @@ struct DisasContext {
 bool spe_enabled;
 bool tm_enabled;
 bool gtse;
+bool hr;
 ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
 int singlestep_enabled;
 uint32_t flags;
@@ -8539,6 +8540,7 @@ static void ppc_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 ctx->vsx_enabled = (hflags >> HFLAGS_VSX) & 1;
 ctx->tm_enabled = (hflags >> HFLAGS_TM) & 1;
 ctx->gtse = (hflags >> HFLAGS_GTSE) & 1;
+ctx->hr = (hflags >> HFLAGS_HR) & 1;
 
 ctx->singlestep_enabled = 0;
 if ((hflags >> HFLAGS_SE) & 1) {
-- 
2.31.1




[PULL 14/44] memory_hotplug.c: handle dev->id = NULL in acpi_memory_hotplug_write()

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

qapi_event_send_mem_unplug_error() deals with @device being NULL by
replacing it with an empty string ("") when emitting the event. Aside
from the fact that this behavior (qapi visitor mapping NULL pointer to
"") can be patched/changed someday, there's also the lack of utility
that the event brings to listeners, e.g. "a memory unplug error happened
somewhere".

In theory we should just avoit emitting this event at all if dev->id is
NULL, but this would be an incompatible change to existing guests.
Instead, let's make the forementioned behavior explicit: if dev->id is
NULL, pass an empty string to qapi_event_send_mem_unplug_error().

Suggested-by: Markus Armbruster 
Reviewed-by: Igor Mammedov 
Reviewed-by: Greg Kurz 
Reviewed-by: David Gibson 
Reviewed-by: Markus Armbruster 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210907004755.424931-2-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 hw/acpi/memory_hotplug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c
index af37889423..6a71de408b 100644
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -178,7 +178,7 @@ static void acpi_memory_hotplug_write(void *opaque, hwaddr 
addr, uint64_t data,
 hotplug_handler_unplug(hotplug_ctrl, dev, &local_err);
 if (local_err) {
 trace_mhp_acpi_pc_dimm_delete_failed(mem_st->selector);
-qapi_event_send_mem_unplug_error(dev->id,
+qapi_event_send_mem_unplug_error(dev->id ? : "",
  error_get_pretty(local_err));
 error_free(local_err);
 break;
-- 
2.31.1




[PULL 17/44] qapi/qdev.json: fix DEVICE_DELETED parameters doc

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

Clarify that @device is optional and that 'path' is the device
path from QOM.

This change follows Markus' suggestion verbatim, provided in full
context here:

https://lists.gnu.org/archive/html/qemu-devel/2021-07/msg01891.html

Suggested-by: Markus Armbruster 
Reviewed-by: Greg Kurz 
Reviewed-by: Markus Armbruster 
Reviewed-by: David Gibson 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210907004755.424931-5-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 qapi/qdev.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qapi/qdev.json b/qapi/qdev.json
index b83178220b..0e9cb2ae88 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -108,9 +108,9 @@
 # At this point, it's safe to reuse the specified device ID. Device removal can
 # be initiated by the guest or by HMP/QMP commands.
 #
-# @device: device name
+# @device: the device's ID if it has one
 #
-# @path: device path
+# @path: the device's QOM path
 #
 # Since: 1.5
 #
-- 
2.31.1




[PULL 08/44] ppc/xive: Export priority_to_ipb() helper

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

Signed-off-by: Cédric Le Goater 
Message-Id: <20210901094153.227671-7-...@kaod.org>
Signed-off-by: David Gibson 
---
 hw/intc/xive.c| 21 ++---
 include/hw/ppc/xive.h | 11 +++
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index b817ee8e37..b0c4f76b1d 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -27,17 +27,6 @@
  * XIVE Thread Interrupt Management context
  */
 
-/*
- * Convert a priority number to an Interrupt Pending Buffer (IPB)
- * register, which indicates a pending interrupt at the priority
- * corresponding to the bit number
- */
-static uint8_t priority_to_ipb(uint8_t priority)
-{
-return priority > XIVE_PRIORITY_MAX ?
-0 : 1 << (XIVE_PRIORITY_MAX - priority);
-}
-
 /*
  * Convert an Interrupt Pending Buffer (IPB) register to a Pending
  * Interrupt Priority Register (PIPR), which contains the priority of
@@ -89,7 +78,7 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
 regs[TM_CPPR] = cppr;
 
 /* Reset the pending buffer bit */
-regs[TM_IPB] &= ~priority_to_ipb(cppr);
+regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
 regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
 
 /* Drop Exception bit */
@@ -353,7 +342,7 @@ static void xive_tm_set_os_cppr(XivePresenter *xptr, 
XiveTCTX *tctx,
 static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
hwaddr offset, uint64_t value, unsigned 
size)
 {
-xive_tctx_ipb_update(tctx, TM_QW1_OS, priority_to_ipb(value & 0xff));
+xive_tctx_ipb_update(tctx, TM_QW1_OS, xive_priority_to_ipb(value & 0xff));
 }
 
 static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk,
@@ -1535,7 +1524,8 @@ bool xive_presenter_notify(XiveFabric *xfb, uint8_t 
format,
 /* handle CPU exception delivery */
 if (count) {
 trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring);
-xive_tctx_ipb_update(match.tctx, match.ring, 
priority_to_ipb(priority));
+xive_tctx_ipb_update(match.tctx, match.ring,
+ xive_priority_to_ipb(priority));
 }
 
 return !!count;
@@ -1682,7 +1672,8 @@ static void xive_router_end_notify(XiveRouter *xrtr, 
uint8_t end_blk,
  * use. The presenter will resend the interrupt when the vCPU
  * is dispatched again on a HW thread.
  */
-ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) | priority_to_ipb(priority);
+ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) |
+xive_priority_to_ipb(priority);
 nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, ipb);
 xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
 
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index db76411654..29b130eaea 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -458,6 +458,17 @@ struct XiveENDSource {
  */
 #define XIVE_PRIORITY_MAX  7
 
+/*
+ * Convert a priority number to an Interrupt Pending Buffer (IPB)
+ * register, which indicates a pending interrupt at the priority
+ * corresponding to the bit number
+ */
+static inline uint8_t xive_priority_to_ipb(uint8_t priority)
+{
+return priority > XIVE_PRIORITY_MAX ?
+0 : 1 << (XIVE_PRIORITY_MAX - priority);
+}
+
 /*
  * XIVE Thread Interrupt Management Aera (TIMA)
  *
-- 
2.31.1




[PULL 16/44] spapr_drc.c: do not error_report() when drc->dev->id == NULL

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

The error_report() call in drc_unisolate_logical() is not considering
that drc->dev->id can be NULL, and the underlying functions error_report()
calls to do its job (vprintf(), g_strdup_printf() ...) has undefined
behavior when trying to handle "%s" with NULL arguments.

Besides, there is no utility into reporting that an unknown device was
rejected by the guest.

Acked-by: David Gibson 
Reviewed-by: Greg Kurz 
Reviewed-by: Markus Armbruster 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210907004755.424931-4-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_drc.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index a2f2634601..a4d9496f76 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -167,8 +167,11 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
 }
 
 drc->unplug_requested = false;
-error_report("Device hotunplug rejected by the guest "
- "for device %s", drc->dev->id);
+
+if (drc->dev->id) {
+error_report("Device hotunplug rejected by the guest "
+ "for device %s", drc->dev->id);
+}
 
 /*
  * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
-- 
2.31.1




[PULL 24/44] target/ppc: Check privilege level based on PSR and LPCR[HR] in tlbie[l]

2021-09-29 Thread David Gibson
From: Matheus Ferst 

PowerISA v3.0B made tlbie[l] hypervisor privileged when PSR=0 and HR=1.
To allow the check at translation time, we'll use the HR bit of LPCR to
check the MMU mode instead of the PATE.HR.

Signed-off-by: Matheus Ferst 
Message-Id: <20210917114751.206845-3-matheus.fe...@eldorado.org.br>
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: David Gibson 
---
 target/ppc/translate.c | 26 +-
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 9af1624ad2..b985e9e55b 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -5517,7 +5517,15 @@ static void gen_tlbiel(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
 GEN_PRIV;
 #else
-CHK_SV;
+bool psr = (ctx->opcode >> 17) & 0x1;
+
+if (ctx->pr || (!ctx->hv && !psr && ctx->hr)) {
+/*
+ * tlbiel is privileged except when PSR=0 and HR=1, making it
+ * hypervisor privileged.
+ */
+GEN_PRIV;
+}
 
 gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
 #endif /* defined(CONFIG_USER_ONLY) */
@@ -5529,12 +5537,20 @@ static void gen_tlbie(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
 GEN_PRIV;
 #else
+bool psr = (ctx->opcode >> 17) & 0x1;
 TCGv_i32 t1;
 
-if (ctx->gtse) {
-CHK_SV; /* If gtse is set then tlbie is supervisor privileged */
-} else {
-CHK_HV; /* Else hypervisor privileged */
+if (ctx->pr) {
+/* tlbie is privileged... */
+GEN_PRIV;
+} else if (!ctx->hv) {
+if (!ctx->gtse || (!psr && ctx->hr)) {
+/*
+ * ... except when GTSE=0 or when PSR=0 and HR=1, making it
+ * hypervisor privileged.
+ */
+GEN_PRIV;
+}
 }
 
 if (NARROW_MODE(ctx)) {
-- 
2.31.1




[PULL 06/44] ppc/pnv: Add a comment on the "primary-topology-index" property

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

On P10, the chip id is calculated from the "Primary topology table
index". See skiboot commits for more information [1].

This information is extracted from the hdata on real systems which
QEMU needs to emulate. Add this property for all machines even if it
is only used on POWER10.

[1] https://github.com/open-power/skiboot/commit/2ce3f083f399
https://github.com/open-power/skiboot/commit/a2d4d7f9e14a

Signed-off-by: Cédric Le Goater 
Message-Id: <20210901094153.227671-4-...@kaod.org>
Signed-off-by: David Gibson 
---
 hw/ppc/pnv_xscom.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
index faa488e311..9ce018dbc2 100644
--- a/hw/ppc/pnv_xscom.c
+++ b/hw/ppc/pnv_xscom.c
@@ -284,6 +284,10 @@ int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset,
 _FDT(xscom_offset);
 g_free(name);
 _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,chip-id", chip->chip_id)));
+/*
+ * On P10, the xscom bus id has been deprecated and the chip id is
+ * calculated from the "Primary topology table index". See skiboot.
+ */
 _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,primary-topology-index",
chip->chip_id)));
 _FDT((fdt_setprop_cell(fdt, xscom_offset, "#address-cells", 1)));
-- 
2.31.1




[PULL 20/44] memory_hotplug.c: send DEVICE_UNPLUG_GUEST_ERROR in acpi_memory_hotplug_write()

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

MEM_UNPLUG_ERROR is deprecated since the introduction of
DEVICE_UNPLUG_GUEST_ERROR. Keep emitting both while the deprecation of
MEM_UNPLUG_ERROR is pending.

CC: Michael S. Tsirkin 
CC: Igor Mammedov 
Acked-by: Michael S. Tsirkin 
Reviewed-by: Greg Kurz 
Reviewed-by: David Gibson 
Reviewed-by: Igor Mammedov 
Reviewed-by: Markus Armbruster 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210907004755.424931-8-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 hw/acpi/memory_hotplug.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c
index 6a71de408b..d0fffcf787 100644
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -8,6 +8,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-events-acpi.h"
 #include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
 
 #define MEMORY_SLOTS_NUMBER  "MDNR"
 #define MEMORY_HOTPLUG_IO_REGION "HPMR"
@@ -178,8 +179,16 @@ static void acpi_memory_hotplug_write(void *opaque, hwaddr 
addr, uint64_t data,
 hotplug_handler_unplug(hotplug_ctrl, dev, &local_err);
 if (local_err) {
 trace_mhp_acpi_pc_dimm_delete_failed(mem_st->selector);
+
+/*
+ * Send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_GUEST_ERROR
+ * while the deprecation of MEM_UNPLUG_ERROR is
+ * pending.
+ */
 qapi_event_send_mem_unplug_error(dev->id ? : "",
  error_get_pretty(local_err));
+qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id,
+  dev->canonical_path);
 error_free(local_err);
 break;
 }
-- 
2.31.1




[PULL 13/44] target/ppc: fix setting of CR flags in bcdcfsq

2021-09-29 Thread David Gibson
From: Luis Pires 

According to the ISA, CR should be set based on the source value, and
not on the packed decimal result.
The way this was implemented would cause GT, LT and EQ to be set
incorrectly when the source value was too large and the 31 least
significant digits of the packed decimal result ended up being all zero.
This would happen for source values of +/-10^31, +/-10^32, etc.

The new implementation fixes this and also skips the result calculation
altogether in case of src overflow.

Signed-off-by: Luis Pires 
Message-Id: <20210823150235.35759-1-luis.pi...@eldorado.org.br>
Reviewed-by: Richard Henderson 
Signed-off-by: David Gibson 
---
 target/ppc/int_helper.c | 61 -
 1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index c2d3248d1e..f5dac3aa87 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -2480,10 +2480,26 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, 
uint32_t ps)
 return cr;
 }
 
+/**
+ * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
+ *
+ * Returns:
+ * > 0 if ahi|alo > bhi|blo,
+ * 0 if ahi|alo == bhi|blo,
+ * < 0 if ahi|alo < bhi|blo
+ */
+static inline int ucmp128(uint64_t alo, uint64_t ahi,
+  uint64_t blo, uint64_t bhi)
+{
+return (ahi == bhi) ?
+(alo > blo ? 1 : (alo == blo ? 0 : -1)) :
+(ahi > bhi ? 1 : -1);
+}
+
 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
 {
 int i;
-int cr = 0;
+int cr;
 uint64_t lo_value;
 uint64_t hi_value;
 ppc_avr_t ret = { .u64 = { 0, 0 } };
@@ -2492,28 +2508,47 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, 
uint32_t ps)
 lo_value = -b->VsrSD(1);
 hi_value = ~b->VsrD(0) + !lo_value;
 bcd_put_digit(&ret, 0xD, 0);
+
+cr = CRF_LT;
 } else {
 lo_value = b->VsrD(1);
 hi_value = b->VsrD(0);
 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
-}
 
-if (divu128(&lo_value, &hi_value, 1000ULL) ||
-lo_value > ULL) {
-cr = CRF_SO;
+if (hi_value == 0 && lo_value == 0) {
+cr = CRF_EQ;
+} else {
+cr = CRF_GT;
+}
 }
 
-for (i = 1; i < 16; hi_value /= 10, i++) {
-bcd_put_digit(&ret, hi_value % 10, i);
-}
+/*
+ * Check src limits: abs(src) <= 10^31 - 1
+ *
+ * 10^31 - 1 = 0x007e37be2022 c0914b267fff
+ */
+if (ucmp128(lo_value, hi_value,
+0xc0914b267fffULL, 0x7e37be2022ULL) > 0) {
+cr |= CRF_SO;
 
-for (; i < 32; lo_value /= 10, i++) {
-bcd_put_digit(&ret, lo_value % 10, i);
-}
+/*
+ * According to the ISA, if src wouldn't fit in the destination
+ * register, the result is undefined.
+ * In that case, we leave r unchanged.
+ */
+} else {
+divu128(&lo_value, &hi_value, 1000ULL);
 
-cr |= bcd_cmp_zero(&ret);
+for (i = 1; i < 16; hi_value /= 10, i++) {
+bcd_put_digit(&ret, hi_value % 10, i);
+}
 
-*r = ret;
+for (; i < 32; lo_value /= 10, i++) {
+bcd_put_digit(&ret, lo_value % 10, i);
+}
+
+*r = ret;
+}
 
 return cr;
 }
-- 
2.31.1




[PULL 22/44] target/ppc: Replace debug messages by asserts for unknown IRQ pins

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

If an unknown pin of the IRQ controller is raised, something is very
wrong in the QEMU model. It is better to abort.

Signed-off-by: Cédric Le Goater 
Message-Id: <20210920061203.989563-3-...@kaod.org>
Signed-off-by: David Gibson 
---
 hw/ppc/ppc.c | 24 ++--
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 7375bf4fa9..a327206a0a 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -165,9 +165,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
 ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
 break;
 default:
-/* Unknown pin - do nothing */
-LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
-return;
+g_assert_not_reached();
 }
 if (level)
 env->irq_input_state |= 1 << pin;
@@ -252,9 +250,7 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
 /* XXX: TODO */
 break;
 default:
-/* Unknown pin - do nothing */
-LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
-return;
+g_assert_not_reached();
 }
 if (level)
 env->irq_input_state |= 1 << pin;
@@ -287,9 +283,7 @@ static void power7_set_irq(void *opaque, int pin, int level)
 ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
 break;
 default:
-/* Unknown pin - do nothing */
-LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
-return;
+g_assert_not_reached();
 }
 }
 
@@ -323,9 +317,7 @@ static void power9_set_irq(void *opaque, int pin, int level)
 ppc_set_irq(cpu, PPC_INTERRUPT_HVIRT, level);
 break;
 default:
-/* Unknown pin - do nothing */
-LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
-return;
+g_assert_not_reached();
 }
 }
 
@@ -459,9 +451,7 @@ static void ppc40x_set_irq(void *opaque, int pin, int level)
 ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
 break;
 default:
-/* Unknown pin - do nothing */
-LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
-return;
+g_assert_not_reached();
 }
 if (level)
 env->irq_input_state |= 1 << pin;
@@ -523,9 +513,7 @@ static void ppce500_set_irq(void *opaque, int pin, int 
level)
 ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
 break;
 default:
-/* Unknown pin - do nothing */
-LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
-return;
+g_assert_not_reached();
 }
 if (level)
 env->irq_input_state |= 1 << pin;
-- 
2.31.1




[PULL 10/44] ppc/pnv: Rename "id" to "quad-id" in PnvQuad

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

This to avoid possible conflicts with the "id" property of QOM objects.

Signed-off-by: Cédric Le Goater 
Message-Id: <20210901094153.227671-9-...@kaod.org>
Signed-off-by: David Gibson 
---
 hw/ppc/pnv.c  | 4 ++--
 hw/ppc/pnv_core.c | 4 ++--
 include/hw/ppc/pnv_core.h | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index a62e90b15e..03c86508d2 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1368,10 +1368,10 @@ static void pnv_chip_quad_realize(Pnv9Chip *chip9, 
Error **errp)
sizeof(*eq), TYPE_PNV_QUAD,
&error_fatal, NULL);
 
-object_property_set_int(OBJECT(eq), "id", core_id, &error_fatal);
+object_property_set_int(OBJECT(eq), "quad-id", core_id, &error_fatal);
 qdev_realize(DEVICE(eq), NULL, &error_fatal);
 
-pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->id),
+pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->quad_id),
 &eq->xscom_regs);
 }
 }
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 4de8414df2..19e8eb885f 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -407,13 +407,13 @@ static void pnv_quad_realize(DeviceState *dev, Error 
**errp)
 PnvQuad *eq = PNV_QUAD(dev);
 char name[32];
 
-snprintf(name, sizeof(name), "xscom-quad.%d", eq->id);
+snprintf(name, sizeof(name), "xscom-quad.%d", eq->quad_id);
 pnv_xscom_region_init(&eq->xscom_regs, OBJECT(dev), &pnv_quad_xscom_ops,
   eq, name, PNV9_XSCOM_EQ_SIZE);
 }
 
 static Property pnv_quad_properties[] = {
-DEFINE_PROP_UINT32("id", PnvQuad, id, 0),
+DEFINE_PROP_UINT32("quad-id", PnvQuad, quad_id, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index 6ecee98a76..c22eab2e1f 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -67,7 +67,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PnvQuad, PNV_QUAD)
 struct PnvQuad {
 DeviceState parent_obj;
 
-uint32_t id;
+uint32_t quad_id;
 MemoryRegion xscom_regs;
 };
 #endif /* PPC_PNV_CORE_H */
-- 
2.31.1




[PULL 18/44] qapi/qdev.json: add DEVICE_UNPLUG_GUEST_ERROR QAPI event

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

At this moment we only provide one event to report a hotunplug error,
MEM_UNPLUG_ERROR. As of Linux kernel 5.12 and QEMU 6.0.0, the pseries
machine is now able to report unplug errors for other device types, such
as CPUs.

Instead of creating a (device_type)_UNPLUG_ERROR for each new device,
create a generic DEVICE_UNPLUG_GUEST_ERROR event that can be used by all
guest side unplug errors in the future. This event has a similar API as
the existing DEVICE_DELETED event, always providing the QOM path of the
device and dev->id if there's any.

With this new generic event, MEM_UNPLUG_ERROR is now marked as deprecated.

Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
Reviewed-by: Markus Armbruster 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210907004755.424931-6-danielhb...@gmail.com>
[dwg: Correct missing ')' in stubs/qdev.c]
Signed-off-by: David Gibson 
---
 docs/about/deprecated.rst | 10 ++
 qapi/machine.json |  7 ++-
 qapi/qdev.json| 27 ++-
 stubs/qdev.c  |  7 +++
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 3c2be84d80..2f7db9a98d 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -238,6 +238,16 @@ The ``I7200`` guest CPU relies on the nanoMIPS ISA, which 
is deprecated
 (the ISA has never been upstreamed to a compiler toolchain). Therefore
 this CPU is also deprecated.
 
+
+QEMU API (QAPI) events
+--
+
+``MEM_UNPLUG_ERROR`` (since 6.2)
+
+
+Use the more generic event ``DEVICE_UNPLUG_GUEST_ERROR`` instead.
+
+
 System emulator machines
 
 
diff --git a/qapi/machine.json b/qapi/machine.json
index 32d47f4e35..66bc34ed8b 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1305,6 +1305,10 @@
 #
 # @msg: Informative message
 #
+# Features:
+# @deprecated: This event is deprecated. Use @DEVICE_UNPLUG_GUEST_ERROR
+#  instead.
+#
 # Since: 2.4
 #
 # Example:
@@ -1317,7 +1321,8 @@
 #
 ##
 { 'event': 'MEM_UNPLUG_ERROR',
-  'data': { 'device': 'str', 'msg': 'str' } }
+  'data': { 'device': 'str', 'msg': 'str' },
+  'features': ['deprecated'] }
 
 ##
 # @SMPConfiguration:
diff --git a/qapi/qdev.json b/qapi/qdev.json
index 0e9cb2ae88..d75e68908b 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -84,7 +84,9 @@
 #This command merely requests that the guest begin the hot removal
 #process.  Completion of the device removal process is signaled with a
 #DEVICE_DELETED event. Guest reset will automatically complete removal
-#for all devices.
+#for all devices.  If a guest-side error in the hot removal process is
+#detected, the device will not be removed and a 
DEVICE_UNPLUG_GUEST_ERROR
+#event is sent.  Some errors cannot be detected.
 #
 # Since: 0.14
 #
@@ -124,3 +126,26 @@
 ##
 { 'event': 'DEVICE_DELETED',
   'data': { '*device': 'str', 'path': 'str' } }
+
+##
+# @DEVICE_UNPLUG_GUEST_ERROR:
+#
+# Emitted when a device hot unplug fails due to a guest reported error.
+#
+# @device: the device's ID if it has one
+#
+# @path: the device's QOM path
+#
+# Since: 6.2
+#
+# Example:
+#
+# <- { "event": "DEVICE_UNPLUG_GUEST_ERROR"
+#  "data": { "device": "core1",
+#"path": "/machine/peripheral/core1" },
+#  },
+#  "timestamp": { "seconds": 1615570772, "microseconds": 202844 } }
+#
+##
+{ 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
+  'data': { '*device': 'str', 'path': 'str' } }
diff --git a/stubs/qdev.c b/stubs/qdev.c
index 92e6143134..187659f707 100644
--- a/stubs/qdev.c
+++ b/stubs/qdev.c
@@ -21,3 +21,10 @@ void qapi_event_send_device_deleted(bool has_device,
 {
 /* Nothing to do. */
 }
+
+void qapi_event_send_device_unplug_guest_error(bool has_device,
+   const char *device,
+   const char *path)
+{
+/* Nothing to do. */
+}
-- 
2.31.1




[PULL 09/44] ppc/xive: Export xive_tctx_word2() helper

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

Signed-off-by: Cédric Le Goater 
Message-Id: <20210901094153.227671-8-...@kaod.org>
Signed-off-by: David Gibson 
---
 hw/intc/xive.c| 5 -
 include/hw/ppc/xive.h | 5 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index b0c4f76b1d..6c82326ec7 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -141,11 +141,6 @@ void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, 
uint8_t ipb)
 xive_tctx_notify(tctx, ring);
 }
 
-static inline uint32_t xive_tctx_word2(uint8_t *ring)
-{
-return *((uint32_t *) &ring[TM_WORD2]);
-}
-
 /*
  * XIVE Thread Interrupt Management Area (TIMA)
  */
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 29b130eaea..252c58a1d6 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -335,6 +335,11 @@ struct XiveTCTX {
 XivePresenter *xptr;
 };
 
+static inline uint32_t xive_tctx_word2(uint8_t *ring)
+{
+return *((uint32_t *) &ring[TM_WORD2]);
+}
+
 /*
  * XIVE Router
  */
-- 
2.31.1




[PULL 07/44] ppc/pnv: Remove useless variable

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

Signed-off-by: Cédric Le Goater 
Message-Id: <20210901094153.227671-5-...@kaod.org>
Signed-off-by: David Gibson 
---
 hw/ppc/pnv.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 2f5358b70c..a62e90b15e 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -838,8 +838,7 @@ static void pnv_init(MachineState *machine)
 for (i = 0; i < pnv->num_chips; i++) {
 char chip_name[32];
 Object *chip = OBJECT(qdev_new(chip_typename));
-int chip_id = i;
-uint64_t chip_ram_size =  pnv_chip_get_ram_size(pnv, chip_id);
+uint64_t chip_ram_size =  pnv_chip_get_ram_size(pnv, i);
 
 pnv->chips[i] = PNV_CHIP(chip);
 
@@ -850,9 +849,9 @@ static void pnv_init(MachineState *machine)
 &error_fatal);
 chip_ram_start += chip_ram_size;
 
-snprintf(chip_name, sizeof(chip_name), "chip[%d]", chip_id);
+snprintf(chip_name, sizeof(chip_name), "chip[%d]", i);
 object_property_add_child(OBJECT(pnv), chip_name, chip);
-object_property_set_int(chip, "chip-id", chip_id, &error_fatal);
+object_property_set_int(chip, "chip-id", i, &error_fatal);
 object_property_set_int(chip, "nr-cores", machine->smp.cores,
 &error_fatal);
 object_property_set_int(chip, "nr-threads", machine->smp.threads,
-- 
2.31.1




[PULL 12/44] ppc/pnv: Add an assert when calculating the RAM distribution on chips

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

Signed-off-by: Cédric Le Goater 
Message-Id: <20210902130928.528803-3-...@kaod.org>
Reviewed-by: Greg Kurz 
Signed-off-by: David Gibson 
---
 hw/ppc/pnv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 03c86508d2..71e45515f1 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -723,6 +723,8 @@ static uint64_t pnv_chip_get_ram_size(PnvMachineState *pnv, 
int chip_id)
 return QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB);
 }
 
+assert(pnv->num_chips > 1);
+
 ram_per_chip = (machine->ram_size - 1 * GiB) / (pnv->num_chips - 1);
 return chip_id == 0 ? 1 * GiB : QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB);
 }
-- 
2.31.1




[PULL 03/44] host-utils: introduce uabs64()

2021-09-29 Thread David Gibson
From: Luis Pires 

Introduce uabs64(), a function that returns the absolute value of
a 64-bit int as an unsigned value. This avoids the undefined behavior
for common abs implementations, where abs of the most negative value is
undefined.

Signed-off-by: Luis Pires 
Reviewed-by: Richard Henderson 
Reviewed-by: Eduardo Habkost 
Message-Id: <20210910112624.72748-4-luis.pi...@eldorado.org.br>
Signed-off-by: David Gibson 
---
 include/qemu/host-utils.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 753b9fb89f..ca9f3f021b 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -357,6 +357,14 @@ static inline uint64_t revbit64(uint64_t x)
 #endif
 }
 
+/**
+ * Return the absolute value of a 64-bit integer as an unsigned 64-bit value
+ */
+static inline uint64_t uabs64(int64_t v)
+{
+return v < 0 ? -v : v;
+}
+
 /**
  * sadd32_overflow - addition with overflow indication
  * @x, @y: addends
-- 
2.31.1




[PULL 02/44] host-utils: fix missing zero-extension in divs128

2021-09-29 Thread David Gibson
From: Luis Pires 

*plow (lower 64 bits of the dividend) is passed into divs128() as
a signed 64-bit integer. When building an __int128_t from it, it
must be zero-extended, instead of sign-extended.

Suggested-by: Richard Henderson 
Signed-off-by: Luis Pires 
Message-Id: <20210910112624.72748-3-luis.pi...@eldorado.org.br>
Signed-off-by: David Gibson 
---
 include/qemu/host-utils.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 711b221704..753b9fb89f 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -70,7 +70,7 @@ static inline int divs128(int64_t *plow, int64_t *phigh, 
int64_t divisor)
 if (divisor == 0) {
 return 1;
 } else {
-__int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
+__int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
 __int128_t result = dividend / divisor;
 *plow = result;
 *phigh = dividend % divisor;
-- 
2.31.1




[PULL 11/44] docs/system: ppc: Update the URL for OpenPOWER firmware images

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

This also fixes a small skiboot/skiroot typo and removes the links to
the specific POWER8 and POWER9 images since the firmware images can be
used to run all machines.

Signed-off-by: Cédric Le Goater 
Message-Id: <20210902130928.528803-2-...@kaod.org>
Reviewed-by: Greg Kurz 
Signed-off-by: David Gibson 
---
 docs/system/ppc/powernv.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/system/ppc/powernv.rst b/docs/system/ppc/powernv.rst
index 4c4cdea527..86186b7d2c 100644
--- a/docs/system/ppc/powernv.rst
+++ b/docs/system/ppc/powernv.rst
@@ -53,8 +53,7 @@ initramfs ``skiroot``. Source code can be found on GitHub:
 
   https://github.com/open-power.
 
-Prebuilt images of ``skiboot`` and ``skiboot`` are made available on the 
`OpenPOWER `__ 
site. To boot a POWER9 machine, use the `witherspoon 
`__
 images. For POWER8, use
-the `palmetto 
`__
 images.
+Prebuilt images of ``skiboot`` and ``skiroot`` are made available on the 
`OpenPOWER `__ site.
 
 QEMU includes a prebuilt image of ``skiboot`` which is updated when a
 more recent version is required by the models.
-- 
2.31.1




[PULL 25/44] spapr_numa.c: split FORM1 code into helpers

2021-09-29 Thread David Gibson
From: Daniel Henrique Barboza 

The upcoming FORM2 NUMA affinity will support asymmetric NUMA topologies
and doesn't need be concerned with all the legacy support for older
pseries FORM1 guests.

We're also not going to calculate associativity domains based on numa
distance (via spapr_numa_define_associativity_domains) since the
distances will be written directly into new DT properties.

Let's split FORM1 code into its own functions to allow for easier
insertion of FORM2 logic later on.

Reviewed-by: Greg Kurz 
Signed-off-by: Daniel Henrique Barboza 
Message-Id: <20210920174947.556324-2-danielhb...@gmail.com>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_numa.c | 35 +--
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 779f18b994..786def7c73 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -92,7 +92,7 @@ static uint8_t spapr_numa_get_numa_level(uint8_t distance)
 return 0;
 }
 
-static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
+static void spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
 {
 MachineState *ms = MACHINE(spapr);
 NodeInfo *numa_info = ms->numa_state->nodes;
@@ -155,8 +155,11 @@ static void 
spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
 
 }
 
-void spapr_numa_associativity_init(SpaprMachineState *spapr,
-   MachineState *machine)
+/*
+ * Set NUMA machine state data based on FORM1 affinity semantics.
+ */
+static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
+   MachineState *machine)
 {
 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 int nb_numa_nodes = machine->numa_state->num_nodes;
@@ -225,7 +228,13 @@ void spapr_numa_associativity_init(SpaprMachineState 
*spapr,
 exit(EXIT_FAILURE);
 }
 
-spapr_numa_define_associativity_domains(spapr);
+spapr_numa_define_FORM1_domains(spapr);
+}
+
+void spapr_numa_associativity_init(SpaprMachineState *spapr,
+   MachineState *machine)
+{
+spapr_numa_FORM1_affinity_init(spapr, machine);
 }
 
 void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
@@ -302,12 +311,8 @@ int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState 
*spapr, void *fdt,
 return ret;
 }
 
-/*
- * Helper that writes ibm,associativity-reference-points and
- * max-associativity-domains in the RTAS pointed by @rtas
- * in the DT @fdt.
- */
-void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
+static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr,
+   void *fdt, int rtas)
 {
 MachineState *ms = MACHINE(spapr);
 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
@@ -365,6 +370,16 @@ void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, 
void *fdt, int rtas)
  maxdomains, sizeof(maxdomains)));
 }
 
+/*
+ * Helper that writes ibm,associativity-reference-points and
+ * max-associativity-domains in the RTAS pointed by @rtas
+ * in the DT @fdt.
+ */
+void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
+{
+spapr_numa_FORM1_write_rtas_dt(spapr, fdt, rtas);
+}
+
 static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
   SpaprMachineState *spapr,
   target_ulong opcode,
-- 
2.31.1




[PULL 00/44] ppc-for-6.2 queue 20210930

2021-09-29 Thread David Gibson
The following changes since commit 6b54a31bf7b403672a798b6443b1930ae6c74dea:

  Merge remote-tracking branch 'remotes/jsnow-gitlab/tags/python-pull-request' 
into staging (2021-09-28 13:07:32 +0100)

are available in the Git repository at:

  https://gitlab.com/dgibson/qemu.git tags/ppc-for-6.2-20210930

for you to fetch changes up to 85d887be82905aa81b5d3d6c483ff0fa9958382b:

  MAINTAINERS: Demote sPAPR from "Supported" to "Maintained" (2021-09-30 
12:26:06 +1000)


ppc patch queue for 2021-09-30

Here's the next batch of ppc related patches for qemu-6.2.  Highlights
are:
 * Fixes for several TCG math instructions from the El Dorado Institute
 * A number of improvements to the powernv machine type
 * Support for a new DEVICE_UNPLUG_GUEST_ERROR QAPI event from Daniel
   Barboza
 * Support for the new FORM2 PAPR NUMA representation.  This allows
   more specific NUMA distances, as well as asymmetric configurations
 * Fix for 64-bit decrementer (used on MicroWatt CPUs)
 * Assorted fixes and cleanups
 * A number of updates to MAINTAINERS

Note that the DEVICE_UNPLUG_GUEST_ERROR stuff includes changes to
files outside my normal area, but has suitable Acks.

The MAINTAINERS updates are mostly about marking minor platforms
unmaintained / orphaned, and moving some pieces away from myself and
Greg.  As we move onto other projects, we're going to need to drop
more of the ppc maintainership, though we're hoping we can avoid too
abrupt a change.


Bin Meng (3):
  hw/intc: openpic: Correct the reset value of IPIDR for FSL chipset
  hw/intc: openpic: Drop Raven related codes
  hw/intc: openpic: Clean up the styles

Cédric Le Goater (13):
  ppc/spapr: Add a POWER10 DD2 CPU
  ppc/pnv: Add a comment on the "primary-topology-index" property
  ppc/pnv: Remove useless variable
  ppc/xive: Export priority_to_ipb() helper
  ppc/xive: Export xive_tctx_word2() helper
  ppc/pnv: Rename "id" to "quad-id" in PnvQuad
  docs/system: ppc: Update the URL for OpenPOWER firmware images
  ppc/pnv: Add an assert when calculating the RAM distribution on chips
  target/ppc: Convert debug to trace events (exceptions)
  target/ppc: Replace debug messages by asserts for unknown IRQ pins
  target/ppc: Convert debug to trace events (decrementer and IRQ)
  target/ppc: Fix 64-bit decrementer
  spapr/xive: Fix kvm_xive_source_reset trace event

Daniel Henrique Barboza (15):
  memory_hotplug.c: handle dev->id = NULL in acpi_memory_hotplug_write()
  spapr.c: handle dev->id in spapr_memory_unplug_rollback()
  spapr_drc.c: do not error_report() when drc->dev->id == NULL
  qapi/qdev.json: fix DEVICE_DELETED parameters doc
  qapi/qdev.json: add DEVICE_UNPLUG_GUEST_ERROR QAPI event
  spapr: use DEVICE_UNPLUG_GUEST_ERROR to report unplug errors
  memory_hotplug.c: send DEVICE_UNPLUG_GUEST_ERROR in 
acpi_memory_hotplug_write()
  spapr_numa.c: split FORM1 code into helpers
  spapr_numa.c: scrap 'legacy_numa' concept
  spapr_numa.c: parametrize FORM1 macros
  spapr_numa.c: rename numa_assoc_array to FORM1_assoc_array
  spapr: move FORM1 verifications to post CAS
  spapr_numa.c: FORM2 NUMA affinity support
  spapr_numa.c: handle auto NUMA node with no distance info
  spapr_numa.c: fixes in spapr_numa_FORM2_write_rtas_tables()

David Gibson (6):
  MAINTAINERS: Remove machine specific files from ppc TCG CPUs entry
  MAINTAINERS: Remove David & Greg as reviewers for a number of boards
  MAINTAINERS: Orphan obscure ppc platforms
  MAINTAINERS: Remove David & Greg as reviewers/co-maintainers of powernv
  MAINTAINERS: Add information for OpenPIC
  MAINTAINERS: Demote sPAPR from "Supported" to "Maintained"

Luis Pires (5):
  host-utils: Fix overflow detection in divu128()
  host-utils: fix missing zero-extension in divs128
  host-utils: introduce uabs64()
  i386/kvm: Replace abs64() with uabs64() from host-utils
  target/ppc: fix setting of CR flags in bcdcfsq

Matheus Ferst (2):
  target/ppc: add LPCR[HR] to DisasContext and hflags
  target/ppc: Check privilege level based on PSR and LPCR[HR] in tlbie[l]

 MAINTAINERS |  47 ++
 docs/about/deprecated.rst   |  10 ++
 docs/system/ppc/powernv.rst |   3 +-
 hw/acpi/memory_hotplug.c|  11 +-
 hw/i386/kvm/i8254.c |   7 +-
 hw/intc/openpic.c   |  92 +--
 hw/intc/spapr_xive_kvm.c|   4 +-
 hw/intc/xive.c  |  26 +--
 hw/ppc/pnv.c|  13 +-
 hw/ppc/pnv_core.c   |   4 +-
 hw/ppc/pnv_xscom.c  |   4 +
 hw/ppc/ppc.c| 211 +---
 hw/ppc/spapr.c  |  53 +++
 hw/ppc/spapr_cpu_core.c |   1 +
 hw/ppc/spapr_drc.c  |  16 +-
 hw/ppc/spapr_hcall.c|   7 +
 hw/ppc/spapr_numa.c |

[PULL 05/44] ppc/spapr: Add a POWER10 DD2 CPU

2021-09-29 Thread David Gibson
From: Cédric Le Goater 

Signed-off-by: Cédric Le Goater 
Message-Id: <20210901094153.227671-3-...@kaod.org>
Reviewed-by: Greg Kurz 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_cpu_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 4f316a6f9d..58e7341cb7 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -382,6 +382,7 @@ static const TypeInfo spapr_cpu_core_type_infos[] = {
 DEFINE_SPAPR_CPU_CORE_TYPE("power9_v1.0"),
 DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"),
 DEFINE_SPAPR_CPU_CORE_TYPE("power10_v1.0"),
+DEFINE_SPAPR_CPU_CORE_TYPE("power10_v2.0"),
 #ifdef CONFIG_KVM
 DEFINE_SPAPR_CPU_CORE_TYPE("host"),
 #endif
-- 
2.31.1




[PULL 04/44] i386/kvm: Replace abs64() with uabs64() from host-utils

2021-09-29 Thread David Gibson
From: Luis Pires 

Drop abs64() and use uabs64() from host-utils, which avoids
an undefined behavior when taking abs of the most negative value.

Signed-off-by: Luis Pires 
Reviewed-by: Richard Henderson 
Reviewed-by: Eduardo Habkost 
Message-Id: <20210910112624.72748-5-luis.pi...@eldorado.org.br>
Signed-off-by: David Gibson 
---
 hw/i386/kvm/i8254.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/hw/i386/kvm/i8254.c b/hw/i386/kvm/i8254.c
index fa68669e8a..191a26fa57 100644
--- a/hw/i386/kvm/i8254.c
+++ b/hw/i386/kvm/i8254.c
@@ -59,11 +59,6 @@ struct KVMPITClass {
 DeviceRealize parent_realize;
 };
 
-static int64_t abs64(int64_t v)
-{
-return v < 0 ? -v : v;
-}
-
 static void kvm_pit_update_clock_offset(KVMPITState *s)
 {
 int64_t offset, clock_offset;
@@ -81,7 +76,7 @@ static void kvm_pit_update_clock_offset(KVMPITState *s)
 clock_gettime(CLOCK_MONOTONIC, &ts);
 offset -= ts.tv_nsec;
 offset -= (int64_t)ts.tv_sec * 10;
-if (abs64(offset) < abs64(clock_offset)) {
+if (uabs64(offset) < uabs64(clock_offset)) {
 clock_offset = offset;
 }
 }
-- 
2.31.1




[PULL 01/44] host-utils: Fix overflow detection in divu128()

2021-09-29 Thread David Gibson
From: Luis Pires 

The previous code didn't detect overflows if the high 64-bit
of the dividend were equal to the 64-bit divisor. In that case,
64 bits wouldn't be enough to hold the quotient.

Signed-off-by: Luis Pires 
Reviewed-by: Richard Henderson 
Message-Id: <20210910112624.72748-2-luis.pi...@eldorado.org.br>
Signed-off-by: David Gibson 
---
 util/host-utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/host-utils.c b/util/host-utils.c
index 7b9322071d..a789a11b46 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -102,7 +102,7 @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t 
divisor)
 *plow  = dlo / divisor;
 *phigh = dlo % divisor;
 return 0;
-} else if (dhi > divisor) {
+} else if (dhi >= divisor) {
 return 1;
 } else {
 
-- 
2.31.1




TCG Floating Point Support (Work in Progress)

2021-09-29 Thread Matt
Hello--

I'm excited to share that I have been developing support for TCG
floating point operations; specifically, to accelerate emulation of
x86 guest code which heavily exercises the x87 FPU for a game console
emulator project based on QEMU. So far, this work has shown great
promise, demonstrating some dramatic performance improvements in
emulation of x87 heavy code.

The feature works in concert with unaccelerated x87 FPU helpers, and
also allows total soft float helper fallback if the user discovers
some issue with the hard float implementation. For the TCG target,
I've opted to implement it for x86-64 hosts using SSE2, although this
could be extended to support full 80b double extended precision with
host x87 support. I'm also in early development of an implementation
for AArch64 hosts.

There are still some significant tasks to be done, like proper
handling of exception flags, edge cases, and testing, to name a few.
Once in a slightly more mature state, I do think this feature would
make a natural addition to upstream QEMU and plan to submit it for
consideration.

I'm writing to the mailing list now to inform FPU maintainers and any
other interested parties that this work is happening, to solicit any
early feedback, and to extend an invitation to anyone interested in
collaborating to expedite its upstreaming.

My initial TCG FP work can be found here:
https://github.com/mborgerson/xemu/pull/464/commits

Thanks,
Matt



Re: [PATCH v5] Prevent vhost-user-blk-test hang

2021-09-29 Thread Raphael Norwitz
On Tue, Sep 28, 2021 at 10:55:00AM +0200, Stefan Hajnoczi wrote:
> On Mon, Sep 27, 2021 at 05:17:01PM +, Raphael Norwitz wrote:
> > In the vhost-user-blk-test, as of now there is nothing stoping
> > vhost-user-blk in QEMU writing to the socket right after forking off the
> > storage daemon before it has a chance to come up properly, leaving the
> > test hanging forever. This intermittently hanging test has caused QEMU
> > automation failures reported multiple times on the mailing list [1].
> > 
> > This change makes the storage-daemon notify the vhost-user-blk-test
> > that it is fully initialized and ready to handle client connections by
> > creating a pidfile on initialiation. This ensures that the storage-daemon
> > backend won't miss vhost-user messages and thereby resolves the hang.
> > 
> > [1] 
> > https://lore.kernel.org/qemu-devel/CAFEAcA8kYpz9LiPNxnWJAPSjc=nv532bedyfynabemeohqb...@mail.gmail.com/
> 

Hey Stefan,

> Hi Raphael,
> I would like to understand the issue that is being worked around in the
> patch.
> 
> QEMU should be okay with listen fd passing. The qemu-storage-daemon
> documentation even contains example code for this
> (docs/tools/qemu-storage-daemon.rst) and that may need to be updated if
> listen fd passing is fundamentally broken.
> 

The issue is that the "client" (in this case vhost-user-blk in QEMU) can
proceed to use the socket before the storage-daemon has a chance to
properly start up and monitor it. This is nothing unique to the
storage-daemon - I've seen races like this happen happend with different
vhost-user backends before.

Yes - I do think the docs can be improved to explicitly state that the
storage-daemon must be allowed to properly initialize before any data is
sent over the socket. Maybe we should even perscribe the use of the pidfile
option?

> Can you share more details about the problem?
> 

Did you see my analysis [1]?

[1] 
https://lore.kernel.org/qemu-devel/20210827165253.GA14291@raphael-debian-dev/

Basically QEMU sends VHOST_USER_GET_PROTOCOL_FEATURES across the vhost
socket and the storage daemon never receives it. Looking at the
QEMU state we see it is stuck waiting for a vhost-user response. Meanwhile
the storage-daemon never receives any message to begin with. AFAICT
there is nothing stopping QEMU from running first and sending a message
before vhost-user-blk comes up, and from testing we can see that waiting
for the storage-daemon to come up resolves the problem completely.

> Does "writing to the socket" mean writing vhost-user protocol messages
> or does it mean connect(2)?
> 

Yes - it means writing vhost-user messages. We see a message sent from
QEMU to the backend.

Note that in qtest_socket_server() (called from create_listen_socket())
we have already called listen() on the socket, so I would expect QEMU
calling connect(2) to succeed and proceed to successfully send messages
whether or not there is another listener. I even tried commenting out the
execlp for the storage-daemon and I saw the same behavior from QEMU - it
sends the message and hangs indefinitely.

> Could the problem be that vhost-user-blk-test.c creates the listen fds
> and does not close them? This means the host network stack doesn't
> consider the socket closed after QEMU terminates and therefore the test
> process hangs after QEMU is gone? In that case vhost-user-blk-test needs
> to close the fds after spawning qemu-storage-daemon.
> 

When the test hangs both QEMU and storage-daemon are still up and
connected to the socket and waiting for messages from each other. I don't
see how we would close the FD in this state or how it would help.

We may want to think about implementing some kind of timeoout for initial
vhost-user messages so that we fail instead of hang in cases like these,
as I proposed in [1]. What do you think?

> Stefan
> 
> > 
> > Signed-off-by: Raphael Norwitz 
> > Reviewed-by: Eric Blake 
> > ---
> >  tests/qtest/vhost-user-blk-test.c | 26 +-
> >  1 file changed, 25 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tests/qtest/vhost-user-blk-test.c 
> > b/tests/qtest/vhost-user-blk-test.c
> > index 6f108a1b62..5fed262da1 100644
> > --- a/tests/qtest/vhost-user-blk-test.c
> > +++ b/tests/qtest/vhost-user-blk-test.c
> > @@ -24,6 +24,7 @@
> >  #define TEST_IMAGE_SIZE (64 * 1024 * 1024)
> >  #define QVIRTIO_BLK_TIMEOUT_US  (30 * 1000 * 1000)
> >  #define PCI_SLOT_HP 0x06
> > +#define PIDFILE_RETRIES 5
> >  
> >  typedef struct {
> >  pid_t pid;
> > @@ -885,7 +886,8 @@ static void start_vhost_user_blk(GString *cmd_line, int 
> > vus_instances,
> >   int num_queues)
> >  {
> >  const char *vhost_user_blk_bin = qtest_qemu_storage_daemon_binary();
> > -int i;
> > +int i, retries;
> > +char *daemon_pidfile_path;
> >  gchar *img_path;
> >  GString *storage_daemon_command = g_string_new(NULL);
> >  QemuStorageDaemonState *qsd;
> > @@ -898,6 +900,8 @@ s

[Bug 1945540] Re: Java crashes on s390x VM with SIGILL/ILL_PRVOPC at '__kernel_getcpu+0x8'

2021-09-29 Thread John Neffenger
I just tried the same s390x virtual machine under QEMU version 6.0.0 in
the Ubuntu 21.10 Beta release, and the error still occurs. My system
information is shown below:

$ qemu-system-s390x --version
QEMU emulator version 6.0.0 (Debian 1:6.0+dfsg-2expubuntu1)
Copyright (c) 2003-2021 Fabrice Bellard and the QEMU Project developers

$ cat /etc/lsb-release
DISTRIB_ID=Ubuntu
DISTRIB_RELEASE=21.10
DISTRIB_CODENAME=impish
DISTRIB_DESCRIPTION="Ubuntu Impish Indri (development branch)"

$ uname -a
Linux ubuntu 5.13.0-16-generic #16-Ubuntu SMP Fri Sep 3 14:53:27 UTC 2021 
x86_64 x86_64 x86_64 GNU/Linux

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1945540

Title:
  Java crashes on s390x VM with SIGILL/ILL_PRVOPC at
  '__kernel_getcpu+0x8'

Status in QEMU:
  New

Bug description:
  Host environment

  - Operating system: Ubuntu 20.04.3 LTS Desktop
  - OS/kernel version: Linux tower 5.11.0-37-generic #41~20.04.2-Ubuntu
  SMP Fri Sep 24 09:06:38 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
  - Architecture: amd64
  - QEMU flavor: qemu-system-s390x
  - QEMU version: QEMU emulator version 4.2.1 (Debian 1:4.2-3ubuntu6.17)
  - QEMU command line: See attached file 'command-line.txt'

  Emulated/Virtualized environment

  - Operating system: Ubuntu 20.04.3 LTS Server
  - OS/kernel version: Linux s390x-focal 5.4.0-88-generic #99-Ubuntu
  SMP Thu Sep 23 17:27:44 UTC 2021 s390x s390x s390x GNU/Linux
  - Architecture: s390x

  Description of problem

  Java crashes as shown below:

  $ java --version
  #
  # A fatal error has been detected by the Java Runtime Environment:
  #
  #  SIGILL (0x4) at pc=0x03ff9f5fe6f4, pid=6789, tid=6818
  #
  # JRE version:  (17.0+35) (build )
  # Java VM: OpenJDK 64-Bit Server VM (17+35-snap, mixed mode, sharing,
  # tiered, compressed oops, compressed class ptrs, g1 gc, linux-s390x)
  # Problematic frame:
  # C  [linux-vdso64.so.1+0x6f8]  __kernel_getcpu+0x8
  #
  # Core dump will be written. Default location: core.6789 (may not
  # exist)
  #
  # An error report file with more information is saved as:
  # /home/ubuntu/src/hs_err_pid6789.log
  #
  #
  Aborted (core dumped)

  Steps to reproduce

  Run any Java program to reproduce the problem.

  Because the 'openjdk' packages in Ubuntu run the 'java' command during
  installation, they hit the same error and fail to install. As an
  alternative, you can install the OpenJDK Snap package for the 's390x'
  architecture as follows:

$ sudo snap install openjdk

  The OpenJDK Snap package has been tested to work on a real IBM/S390
  8561 system, namely the IBM LinuxONE III LT1 at Marist College:

Marist College Installs World’s First IBM LinuxONE III™
https://www.marist.edu/-/marist-first-linuxone-iii

  Additional information

  See the following attached files:

  command-line.txt - the command-line used to start the virtual machine
  hs_err_pid6789.log - the log file resulting from 'java --version'

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1945540/+subscriptions




[PATCH v9 08/10] vhost: add support for configure interrupt

2021-09-29 Thread Cindy Lu
Add support for configure interrupt in vhost
the interrupt will start in vhost_dev_start
and stop in vhost_dev_stop

Signed-off-by: Cindy Lu 
---
 hw/virtio/vhost.c | 76 +++
 include/hw/virtio/vhost.h |  4 +++
 2 files changed, 80 insertions(+)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index e8f85a5d2d..3b04027424 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1534,6 +1534,67 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, 
VirtIODevice *vdev, int n,
 }
 }
 
+bool vhost_config_pending(struct vhost_dev *hdev)
+{
+assert(hdev->vhost_ops);
+if ((hdev->started == false) ||
+(hdev->vhost_ops->vhost_set_config_call == NULL)) {
+return false;
+}
+
+EventNotifier *notifier =
+&hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier;
+return event_notifier_test_and_clear(notifier);
+}
+
+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask)
+{
+int fd;
+int r;
+EventNotifier *notifier =
+&hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier;
+EventNotifier *config_notifier = &vdev->config_notifier;
+assert(hdev->vhost_ops);
+
+if ((hdev->started == false) ||
+(hdev->vhost_ops->vhost_set_config_call == NULL)) {
+return;
+}
+if (mask) {
+assert(vdev->use_guest_notifier_mask);
+fd = event_notifier_get_fd(notifier);
+} else {
+fd = event_notifier_get_fd(config_notifier);
+}
+r = hdev->vhost_ops->vhost_set_config_call(hdev, fd);
+if (r < 0) {
+VHOST_OPS_DEBUG("vhost_set_config_call failed");
+}
+}
+
+static void vhost_stop_config_intr(struct vhost_dev *dev)
+{
+int fd = -1;
+assert(dev->vhost_ops);
+if (dev->vhost_ops->vhost_set_config_call) {
+dev->vhost_ops->vhost_set_config_call(dev, fd);
+}
+}
+
+static void vhost_start_config_intr(struct vhost_dev *dev)
+{
+int r;
+
+assert(dev->vhost_ops);
+int fd = event_notifier_get_fd(&dev->vdev->config_notifier);
+if (dev->vhost_ops->vhost_set_config_call) {
+r = dev->vhost_ops->vhost_set_config_call(dev, fd);
+if (!r) {
+event_notifier_set(&dev->vdev->config_notifier);
+}
+}
+}
+
 uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
 uint64_t features)
 {
@@ -1752,6 +1813,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice 
*vdev)
 }
 }
 
+r = event_notifier_init(
+&hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0);
+if (r < 0) {
+return r;
+}
+event_notifier_test_and_clear(
+&hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
+if (!vdev->use_guest_notifier_mask) {
+vhost_config_mask(hdev, vdev, true);
+}
 if (hdev->log_enabled) {
 uint64_t log_base;
 
@@ -1785,6 +1856,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice 
*vdev)
 vhost_device_iotlb_miss(hdev, vq->used_phys, true);
 }
 }
+vhost_start_config_intr(hdev);
 return 0;
 fail_log:
 vhost_log_put(hdev, false);
@@ -1810,6 +1882,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice 
*vdev)
 
 /* should only be called after backend is connected */
 assert(hdev->vhost_ops);
+event_notifier_test_and_clear(
+&hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
+event_notifier_test_and_clear(&vdev->config_notifier);
 
 if (hdev->vhost_ops->vhost_dev_start) {
 hdev->vhost_ops->vhost_dev_start(hdev, false);
@@ -1827,6 +1902,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice 
*vdev)
 }
 memory_listener_unregister(&hdev->iommu_listener);
 }
+vhost_stop_config_intr(hdev);
 vhost_log_put(hdev, true);
 hdev->started = false;
 hdev->vdev = NULL;
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 045d0fd9f2..e938cc3b4b 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -29,6 +29,7 @@ struct vhost_virtqueue {
 unsigned long long used_phys;
 unsigned used_size;
 EventNotifier masked_notifier;
+EventNotifier masked_config_notifier;
 struct vhost_dev *dev;
 };
 
@@ -37,6 +38,7 @@ typedef unsigned long vhost_log_chunk_t;
 #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t))
 #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS)
 #define VHOST_INVALID_FEATURE_BIT   (0xff)
+#define VHOST_QUEUE_NUM_CONFIG_INR  0
 
 struct vhost_log {
 unsigned long long size;
@@ -110,6 +112,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice 
*vdev);
 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
+bool vhost_config_pending(struct vhost_dev *hdev);
+void v

[PATCH v9 06/10] virtio: add support for configure interrupt

2021-09-29 Thread Cindy Lu
Add the support for configure interrupt in virtio
add notifier_read and set_fd_handler function

Signed-off-by: Cindy Lu 
---
 hw/virtio/virtio.c | 29 +
 include/hw/virtio/virtio.h |  4 
 2 files changed, 33 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 31987b103b..bd222edc9e 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -3531,7 +3531,14 @@ static void 
virtio_queue_guest_notifier_read(EventNotifier *n)
 virtio_irq(vq);
 }
 }
+static void virtio_config_guest_notifier_read(EventNotifier *n)
+{
+VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
 
+if (event_notifier_test_and_clear(n)) {
+virtio_notify_config(vdev);
+}
+}
 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
 bool with_irqfd)
 {
@@ -3548,6 +3555,23 @@ void 
virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
 }
 }
 
+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
+ bool assign, bool with_irqfd)
+{
+EventNotifier *n;
+n = &vdev->config_notifier;
+if (assign && !with_irqfd) {
+event_notifier_set_handler(n, virtio_config_guest_notifier_read);
+} else {
+event_notifier_set_handler(n, NULL);
+}
+if (!assign) {
+/* Test and clear notifier before closing it,*/
+/* in case poll callback didn't have time to run. */
+virtio_config_guest_notifier_read(n);
+}
+}
+
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
 {
 return &vq->guest_notifier;
@@ -3621,6 +3645,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue 
*vq)
 return &vq->host_notifier;
 }
 
+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
+{
+return &vdev->config_notifier;
+}
+
 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
 {
 vq->host_notifier_enabled = enabled;
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 2766c293f4..9e02d155a1 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -110,6 +110,7 @@ struct VirtIODevice
 bool use_guest_notifier_mask;
 AddressSpace *dma_as;
 QLIST_HEAD(, VirtQueue) *vector_queues;
+EventNotifier config_notifier;
 };
 
 struct VirtioDeviceClass {
@@ -312,11 +313,14 @@ uint16_t virtio_get_queue_index(VirtQueue *vq);
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
 bool with_irqfd);
+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
+ bool assign, bool with_irqfd);
 int virtio_device_start_ioeventfd(VirtIODevice *vdev);
 int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
 void virtio_device_release_ioeventfd(VirtIODevice *vdev);
 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev);
 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled);
 void virtio_queue_host_notifier_read(EventNotifier *n);
 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
-- 
2.21.3




[PATCH v9 05/10] vhost-vdpa: add support for config interrupt call back

2021-09-29 Thread Cindy Lu
Add new call back function in vhost-vdpa, this call back function will
set the fb number to hardware.

Signed-off-by: Cindy Lu 
---
 hw/virtio/trace-events | 2 ++
 hw/virtio/vhost-vdpa.c | 7 +++
 2 files changed, 9 insertions(+)

diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 8ed19e9d0c..836e73d1f7 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -52,6 +52,8 @@ vhost_vdpa_set_vring_call(void *dev, unsigned int index, int 
fd) "dev: %p index:
 vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 
0x%"PRIx64
 vhost_vdpa_set_owner(void *dev) "dev: %p"
 vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t 
avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 
0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
+vhost_vdpa_set_config_call(void *dev, int fd)"dev: %p fd: %d"
+
 
 # virtio.c
 virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned 
out_num) "elem %p size %zd in_num %u out_num %u"
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 4fa414feea..73764afc61 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -622,6 +622,12 @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
 trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
 }
+static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
+   int fd)
+{
+trace_vhost_vdpa_set_config_call(dev, fd);
+return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd);
+}
 
 static int vhost_vdpa_get_features(struct vhost_dev *dev,
  uint64_t *features)
@@ -688,4 +694,5 @@ const VhostOps vdpa_ops = {
 .vhost_get_device_id = vhost_vdpa_get_device_id,
 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
 .vhost_force_iommu = vhost_vdpa_force_iommu,
+.vhost_set_config_call = vhost_vdpa_set_config_call,
 };
-- 
2.21.3




[PATCH v9 10/10] virtio-pci: add support for configure interrupt

2021-09-29 Thread Cindy Lu
Add support for configure interrupt, The process is used kvm_irqfd_assign
to set the gsi to kernel. When the configure notifier was signal by
host, qemu will inject a msix interrupt to guest

Signed-off-by: Cindy Lu 
---
 hw/virtio/virtio-pci.c | 88 +-
 hw/virtio/virtio-pci.h |  4 +-
 2 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index d0a2c2fb81..50179c2ba1 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -728,7 +728,8 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, 
int queue_no,
 VirtQueue *vq;
 
 if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
-return -1;
+*n = virtio_config_get_guest_notifier(vdev);
+*vector = vdev->config_vector;
 } else {
 if (!virtio_queue_get_num(vdev, queue_no)) {
 return -1;
@@ -806,6 +807,10 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy 
*proxy, int nvqs)
 return ret;
 }
 
+static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy)
+{
+return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX);
+}
 
 static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy,
   int queue_no)
@@ -829,6 +834,7 @@ static void 
kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy,
 }
 kvm_virtio_pci_vq_vector_release(proxy, vector);
 }
+
 static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
 {
 int queue_no;
@@ -842,6 +848,11 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy 
*proxy, int nvqs)
 }
 }
 
+static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy)
+{
+kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX);
+}
+
 static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector,
@@ -923,9 +934,17 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, 
unsigned vector,
 }
 vq = virtio_vector_next_queue(vq);
 }
-
+/* unmask config intr */
+n = virtio_config_get_guest_notifier(vdev);
+ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector,
+   msg, n);
+if (ret < 0) {
+goto undo_config;
+}
 return 0;
-
+undo_config:
+n = virtio_config_get_guest_notifier(vdev);
+virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n);
 undo:
 vq = virtio_vector_first_queue(vdev, vector);
 while (vq && unmasked >= 0) {
@@ -959,6 +978,8 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned 
vector)
 }
 vq = virtio_vector_next_queue(vq);
 }
+n = virtio_config_get_guest_notifier(vdev);
+virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n);
 }
 
 static void virtio_pci_vector_poll(PCIDevice *dev,
@@ -971,19 +992,17 @@ static void virtio_pci_vector_poll(PCIDevice *dev,
 int queue_no;
 unsigned int vector;
 EventNotifier *notifier;
-VirtQueue *vq;
-
-for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
-if (!virtio_queue_get_num(vdev, queue_no)) {
+int ret;
+for (queue_no = VIRTIO_CONFIG_IRQ_IDX;
+ queue_no < proxy->nvqs_with_notifiers; queue_no++) {
+ret = virtio_pci_get_notifier(proxy, queue_no, ¬ifier, &vector);
+if (ret < 0) {
 break;
 }
-vector = virtio_queue_vector(vdev, queue_no);
 if (vector < vector_start || vector >= vector_end ||
 !msix_is_masked(dev, vector)) {
 continue;
 }
-vq = virtio_get_queue(vdev, queue_no);
-notifier = virtio_queue_get_guest_notifier(vq);
 if (k->guest_notifier_pending) {
 if (k->guest_notifier_pending(vdev, queue_no)) {
 msix_set_pending(dev, vector);
@@ -994,23 +1013,42 @@ static void virtio_pci_vector_poll(PCIDevice *dev,
 }
 }
 
+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue 
*vq,
+  int n, bool assign,
+  bool with_irqfd)
+{
+if (n == VIRTIO_CONFIG_IRQ_IDX) {
+virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd);
+} else {
+virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd);
+}
+}
+
 static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
  bool with_irqfd)
 {
 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
-VirtQueue *vq = virtio_get_queue(vdev, n);
-EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
+VirtQueue *vq = NULL;
+EventNotifier *no

[PATCH v9 04/10] vhost: add new call back function for config interrupt

2021-09-29 Thread Cindy Lu
To support the config interrupt, we need to
add a new call back function for config interrupt.

Signed-off-by: Cindy Lu 
---
 include/hw/virtio/vhost-backend.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/hw/virtio/vhost-backend.h 
b/include/hw/virtio/vhost-backend.h
index 8475c5a29d..e732d2e702 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -126,6 +126,8 @@ typedef int (*vhost_get_device_id_op)(struct vhost_dev 
*dev, uint32_t *dev_id);
 
 typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
 
+typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
+   int fd);
 typedef struct VhostOps {
 VhostBackendType backend_type;
 vhost_backend_init vhost_backend_init;
@@ -171,6 +173,7 @@ typedef struct VhostOps {
 vhost_vq_get_addr_op  vhost_vq_get_addr;
 vhost_get_device_id_op vhost_get_device_id;
 vhost_force_iommu_op vhost_force_iommu;
+vhost_set_config_call_op vhost_set_config_call;
 } VhostOps;
 
 extern const VhostOps user_ops;
-- 
2.21.3




[PATCH v9 09/10] virtio-mmio: add support for configure interrupt

2021-09-29 Thread Cindy Lu
Add configure interrupt support for virtio-mmio bus. This
interrupt will be working while the backend is vhost-vdpa

Signed-off-by: Cindy Lu 
---
 hw/virtio/virtio-mmio.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 1af48a1b04..695fd31f9d 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -673,7 +673,30 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, 
int n, bool assign,
 
 return 0;
 }
+static int virtio_mmio_set_config_guest_notifier(DeviceState *d, bool assign)
+{
+VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+bool with_irqfd = false;
+EventNotifier *notifier = virtio_config_get_guest_notifier(vdev);
+int r = 0;
 
+if (assign) {
+r = event_notifier_init(notifier, 0);
+if (r < 0) {
+return r;
+}
+virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd);
+} else {
+virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd);
+event_notifier_cleanup(notifier);
+}
+if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) {
+vdc->guest_notifier_mask(vdev, VIRTIO_CONFIG_IRQ_IDX, !assign);
+}
+return r;
+}
 static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs,
bool assign)
 {
@@ -695,6 +718,10 @@ static int virtio_mmio_set_guest_notifiers(DeviceState *d, 
int nvqs,
 goto assign_error;
 }
 }
+r = virtio_mmio_set_config_guest_notifier(d, assign);
+if (r < 0) {
+goto assign_error;
+}
 
 return 0;
 
-- 
2.21.3




[PATCH v9 03/10] virtio-pci: decouple the single vector from the interrupt process

2021-09-29 Thread Cindy Lu
To reuse the interrupt process in configure interrupt
decouple the single vector from the interrupt process. Add new function
the kvm_virtio_pci_vector_use_one and _release_one. these functions are use
for the single vector, the whole process will finish in a loop for vq number.

Signed-off-by: Cindy Lu 
---
 hw/virtio/virtio-pci.c | 130 +++--
 1 file changed, 72 insertions(+), 58 deletions(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 456782c43e..d0a2c2fb81 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -677,7 +677,6 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev,
 }
 
 static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
-unsigned int queue_no,
 unsigned int vector)
 {
 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
@@ -744,87 +743,102 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy 
*proxy, int queue_no,
 return 0;
 }
 
-static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no)
 {
+unsigned int vector;
+int ret;
+EventNotifier *n;
 PCIDevice *dev = &proxy->pci_dev;
 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
-unsigned int vector;
-int ret, queue_no;
-EventNotifier *n;
-for (queue_no = 0; queue_no < nvqs; queue_no++) {
-if (!virtio_queue_get_num(vdev, queue_no)) {
-break;
-}
-ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
-if (ret < 0) {
-break;
-}
-if (vector >= msix_nr_vectors_allocated(dev)) {
-continue;
-}
-ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector);
+
+ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+if (ret < 0) {
+return ret;
+}
+if (vector >= msix_nr_vectors_allocated(dev)) {
+return -1;
+}
+ret = kvm_virtio_pci_vq_vector_use(proxy, vector);
+if (ret < 0) {
+goto undo;
+}
+/*
+ * If guest supports masking, set up irqfd now.
+ * Otherwise, delay until unmasked in the frontend.
+ */
+if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
 if (ret < 0) {
+kvm_virtio_pci_vq_vector_release(proxy, vector);
 goto undo;
 }
-/* If guest supports masking, set up irqfd now.
- * Otherwise, delay until unmasked in the frontend.
- */
-if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
-ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
-if (ret < 0) {
-kvm_virtio_pci_vq_vector_release(proxy, vector);
-goto undo;
-}
-}
 }
-return 0;
 
+return 0;
 undo:
-while (--queue_no >= 0) {
-vector = virtio_queue_vector(vdev, queue_no);
-if (vector >= msix_nr_vectors_allocated(dev)) {
-continue;
+
+vector = virtio_queue_vector(vdev, queue_no);
+if (vector >= msix_nr_vectors_allocated(dev)) {
+return ret;
+}
+if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+if (ret < 0) {
+return ret;
 }
-if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
-ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
-if (ret < 0) {
-break;
-}
-kvm_virtio_pci_irqfd_release(proxy, n, vector);
+kvm_virtio_pci_irqfd_release(proxy, n, vector);
+}
+return ret;
+}
+static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+{
+int queue_no;
+int ret = 0;
+VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+for (queue_no = 0; queue_no < nvqs; queue_no++) {
+if (!virtio_queue_get_num(vdev, queue_no)) {
+return -1;
 }
-kvm_virtio_pci_vq_vector_release(proxy, vector);
+ret = kvm_virtio_pci_vector_use_one(proxy, queue_no);
 }
 return ret;
 }
 
-static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+
+static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy,
+  int queue_no)
 {
-PCIDevice *dev = &proxy->pci_dev;
 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 unsigned int vector;
-int queue_no;
-VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 EventNotifier *n;
-int ret ;
+int ret;
+VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+PCIDevice *dev = &proxy->pci_dev;
+
+ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vecto

[PATCH v9 07/10] virtio-net: add support for configure interrupt

2021-09-29 Thread Cindy Lu
Add support for configure interrupt in virtio_net
The functions are config_pending and config_mask

Signed-off-by: Cindy Lu 
---
 hw/net/vhost_net.c  | 10 ++
 hw/net/virtio-net.c |  6 ++
 include/net/vhost_net.h |  3 +++
 3 files changed, 19 insertions(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 10a7780a13..1e78ef8349 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -433,6 +433,16 @@ void vhost_net_virtqueue_mask(VHostNetState *net, 
VirtIODevice *dev,
 vhost_virtqueue_mask(&net->dev, dev, idx, mask);
 }
 
+bool vhost_net_config_pending(VHostNetState *net)
+{
+return vhost_config_pending(&net->dev);
+}
+
+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev,
+  bool mask)
+{
+vhost_config_mask(&net->dev, dev, mask);
+}
 VHostNetState *get_vhost_net(NetClientState *nc)
 {
 VHostNetState *vhost_net = 0;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 65b7cabcaf..005818a45a 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -3155,6 +3155,9 @@ static bool 
virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
 if (idx != VIRTIO_CONFIG_IRQ_IDX) {
 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
 }
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+return vhost_net_config_pending(get_vhost_net(nc->peer));
+   }
 return false;
 }
 
@@ -3167,6 +3170,9 @@ static void virtio_net_guest_notifier_mask(VirtIODevice 
*vdev, int idx,
 if (idx != VIRTIO_CONFIG_IRQ_IDX) {
 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
 }
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
+ }
 }
 
 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 172b0051d8..478c127582 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -36,6 +36,9 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t 
*data,
 bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
   int idx, bool mask);
+bool vhost_net_config_pending(VHostNetState *net);
+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev,
+  bool mask);
 int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr);
 VHostNetState *get_vhost_net(NetClientState *nc);
 
-- 
2.21.3




[PATCH v9 02/10] virtio-pci: decouple notifier from interrupt process

2021-09-29 Thread Cindy Lu
To reuse the notifier process in configure interrupt.
Use the virtio_pci_get_notifier function to get the notifier.
the INPUT of this function is the IDX, the OUTPUT is notifier and
the vector

Signed-off-by: Cindy Lu 
---
 hw/virtio/virtio-pci.c | 84 +-
 1 file changed, 58 insertions(+), 26 deletions(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 433060ac02..456782c43e 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -704,29 +704,45 @@ static void 
kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
 }
 
 static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
- unsigned int queue_no,
+ EventNotifier *n,
  unsigned int vector)
 {
 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
-VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
-VirtQueue *vq = virtio_get_queue(vdev, queue_no);
-EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
 }
 
 static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
-  unsigned int queue_no,
+  EventNotifier *n ,
   unsigned int vector)
 {
-VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
-VirtQueue *vq = virtio_get_queue(vdev, queue_no);
-EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 int ret;
 
 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
 assert(ret == 0);
 }
+static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
+  EventNotifier **n, unsigned int *vector)
+{
+PCIDevice *dev = &proxy->pci_dev;
+VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+VirtQueue *vq;
+
+if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
+return -1;
+} else {
+if (!virtio_queue_get_num(vdev, queue_no)) {
+return -1;
+}
+*vector = virtio_queue_vector(vdev, queue_no);
+vq = virtio_get_queue(vdev, queue_no);
+*n = virtio_queue_get_guest_notifier(vq);
+}
+if (*vector >= msix_nr_vectors_allocated(dev)) {
+return -1;
+}
+return 0;
+}
 
 static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
 {
@@ -735,12 +751,15 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy 
*proxy, int nvqs)
 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 unsigned int vector;
 int ret, queue_no;
-
+EventNotifier *n;
 for (queue_no = 0; queue_no < nvqs; queue_no++) {
 if (!virtio_queue_get_num(vdev, queue_no)) {
 break;
 }
-vector = virtio_queue_vector(vdev, queue_no);
+ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+if (ret < 0) {
+break;
+}
 if (vector >= msix_nr_vectors_allocated(dev)) {
 continue;
 }
@@ -752,7 +771,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, 
int nvqs)
  * Otherwise, delay until unmasked in the frontend.
  */
 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
-ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
+ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
 if (ret < 0) {
 kvm_virtio_pci_vq_vector_release(proxy, vector);
 goto undo;
@@ -768,7 +787,11 @@ undo:
 continue;
 }
 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
-kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
+ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+if (ret < 0) {
+break;
+}
+kvm_virtio_pci_irqfd_release(proxy, n, vector);
 }
 kvm_virtio_pci_vq_vector_release(proxy, vector);
 }
@@ -782,12 +805,16 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy 
*proxy, int nvqs)
 unsigned int vector;
 int queue_no;
 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
-
+EventNotifier *n;
+int ret ;
 for (queue_no = 0; queue_no < nvqs; queue_no++) {
 if (!virtio_queue_get_num(vdev, queue_no)) {
 break;
 }
-vector = virtio_queue_vector(vdev, queue_no);
+ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+if (ret < 0) {
+break;
+}
 if (vector >= msix_nr_vectors_allocated(dev)) {
 continue;
 }
@@ -795,21 +822,20 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy 
*proxy, int nvqs)
  * Otherwise, it was cleaned when masked in the frontend.
  */
 if (vdev->use_guest

[PATCH v9 01/10] virtio: introduce macro IRTIO_CONFIG_IRQ_IDX

2021-09-29 Thread Cindy Lu
To support configure interrupt for vhost-vdpa
introduce VIRTIO_CONFIG_IRQ_IDX -1 as config queue index, Then we can reuse
the function guest_notifier_mask and guest_notifier_pending.
Add the check of queue index, if the driver does not support configure
interrupt, the function will just return

Signed-off-by: Cindy Lu 
---
 hw/display/vhost-user-gpu.c|  6 ++
 hw/net/virtio-net.c| 10 +++---
 hw/virtio/vhost-user-fs.c  |  9 +++--
 hw/virtio/vhost-vsock-common.c |  6 ++
 hw/virtio/virtio-crypto.c  |  6 ++
 include/hw/virtio/virtio.h |  2 ++
 6 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
index 49df56cd14..73ad3d84c9 100644
--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c
@@ -485,6 +485,9 @@ vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, 
int idx)
 {
 VhostUserGPU *g = VHOST_USER_GPU(vdev);
 
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+return false;
+}
 return vhost_virtqueue_pending(&g->vhost->dev, idx);
 }
 
@@ -493,6 +496,9 @@ vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int 
idx, bool mask)
 {
 VhostUserGPU *g = VHOST_USER_GPU(vdev);
 
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+return;
+}
 vhost_virtqueue_mask(&g->vhost->dev, vdev, idx, mask);
 }
 
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 16d20cdee5..65b7cabcaf 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -3152,7 +3152,10 @@ static bool 
virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
 VirtIONet *n = VIRTIO_NET(vdev);
 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
 assert(n->vhost_started);
-return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
+if (idx != VIRTIO_CONFIG_IRQ_IDX) {
+return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
+}
+return false;
 }
 
 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
@@ -3161,8 +3164,9 @@ static void virtio_net_guest_notifier_mask(VirtIODevice 
*vdev, int idx,
 VirtIONet *n = VIRTIO_NET(vdev);
 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
 assert(n->vhost_started);
-vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
- vdev, idx, mask);
+if (idx != VIRTIO_CONFIG_IRQ_IDX) {
+vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
+}
 }
 
 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index c595957983..309c8efabf 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -156,11 +156,13 @@ static void vuf_handle_output(VirtIODevice *vdev, 
VirtQueue *vq)
  */
 }
 
-static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx,
-bool mask)
+static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask)
 {
 VHostUserFS *fs = VHOST_USER_FS(vdev);
 
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+return;
+}
 vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask);
 }
 
@@ -168,6 +170,9 @@ static bool vuf_guest_notifier_pending(VirtIODevice *vdev, 
int idx)
 {
 VHostUserFS *fs = VHOST_USER_FS(vdev);
 
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+return false;
+}
 return vhost_virtqueue_pending(&fs->vhost_dev, idx);
 }
 
diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c
index 4ad6e234ad..2112b44802 100644
--- a/hw/virtio/vhost-vsock-common.c
+++ b/hw/virtio/vhost-vsock-common.c
@@ -101,6 +101,9 @@ static void 
vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx,
 {
 VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
 
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+return;
+}
 vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask);
 }
 
@@ -109,6 +112,9 @@ static bool 
vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev,
 {
 VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
 
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+return false;
+}
 return vhost_virtqueue_pending(&vvc->vhost_dev, idx);
 }
 
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 54f9bbb789..1d5192f8b4 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -948,6 +948,9 @@ static void virtio_crypto_guest_notifier_mask(VirtIODevice 
*vdev, int idx,
 
 assert(vcrypto->vhost_started);
 
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+return;
+}
 cryptodev_vhost_virtqueue_mask(vdev, queue, idx, mask);
 }
 
@@ -958,6 +961,9 @@ static bool 
virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx)
 
 assert(vcrypto->vhost_started);
 
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+return false;
+}
 return cryptodev_vhost_virtqueue_pending(vdev, queue, idx);
 }
 
diff --git a/include/hw

[PATCH v9 00/10] vhost-vdpa: add support for configure interrupt

2021-09-29 Thread Cindy Lu
these patches add the support for configure interrupt

These codes are all tested in vp-vdpa (support configure interrupt)
vdpa_sim (not support configure interrupt), virtio tap device

test in virtio-pci bus and virtio-mmio bus

Change in v2:
Add support for virtio-mmio bus
active the notifier while the backend support configure interrupt
misc fixes from v1

Change in v3
fix the coding style problems

Change in v4
misc fixes from v3
merge the set_config_notifier to set_guest_notifier
when vdpa start, check the feature by VIRTIO_NET_F_STATUS

Change in v5
misc fixes from v4
split the code to introduce configure interrupt type and the callback function
will init the configure interrupt in all virtio-pci and virtio-mmio bus, but 
will
only active while using vhost-vdpa driver

Change in v6
misc fixes from v5
decouple virtqueue from interrupt setting and misc process
fix the bug in virtio_net_handle_rx
use -1 as the queue number to identify if the interrupt is configure interrupt

Change in v7
misc fixes from v6
decouple virtqueue from interrupt setting and misc process
decouple virtqueue from vector use/release process
decouple virtqueue from set notifier fd handler process
move config_notifier and masked_config_notifier to VirtIODevice
fix the bug in virtio_net_handle_rx, add more information
add VIRTIO_CONFIG_IRQ_IDX as the queue number to identify if the interrupt is 
configure interrupt

Change in v8
misc fixes from v7
decouple virtqueue from interrupt setting and misc process
decouple virtqueue from vector use/release process
decouple virtqueue from set notifier fd handler process
move the vhost configure interrupt to vhost_net

Change in v9
misc fixes from v8
address the comments for v8

Cindy Lu (10):
  virtio: introduce macro IRTIO_CONFIG_IRQ_IDX
  virtio-pci: decouple notifier from interrupt process
  virtio-pci: decouple the single vector from the interrupt process
  vhost: add new call back function for config interrupt
  vhost-vdpa: add support for config interrupt call back
  virtio: add support for configure interrupt
  virtio-net: add support for configure interrupt
  vhost: add support for configure interrupt
  virtio-mmio: add support for configure interrupt
  virtio-pci: add support for configure interrupt

 hw/display/vhost-user-gpu.c   |   6 +
 hw/net/vhost_net.c|  10 ++
 hw/net/virtio-net.c   |  16 +-
 hw/virtio/trace-events|   2 +
 hw/virtio/vhost-user-fs.c |   9 +-
 hw/virtio/vhost-vdpa.c|   7 +
 hw/virtio/vhost-vsock-common.c|   6 +
 hw/virtio/vhost.c |  76 +
 hw/virtio/virtio-crypto.c |   6 +
 hw/virtio/virtio-mmio.c   |  27 
 hw/virtio/virtio-pci.c| 260 --
 hw/virtio/virtio-pci.h|   4 +-
 hw/virtio/virtio.c|  29 
 include/hw/virtio/vhost-backend.h |   3 +
 include/hw/virtio/vhost.h |   4 +
 include/hw/virtio/virtio.h|   6 +
 include/net/vhost_net.h   |   3 +
 17 files changed, 386 insertions(+), 88 deletions(-)

-- 
2.21.3




Rust in Qemu BoF followup 2: Rust toolchain availability

2021-09-29 Thread David Gibson
Hi again all,

I've now done.. or at least started... the second part of my followup
from the KVM Forum BoF on Rust in Qemu.

I've extended the page at https://wiki.qemu.org/RustInQemu with
information on Rust toolchain availability.  However, I found I had a
lot more open questions on this one, so there are quite a lot of gaps.

In particular:
 * I haven't so far figured out how to definitively check package
   information for RHEL & SLES (they're not covered by repology, and
   RHEL module structure confuses me, even as a RedHatter)
 * I'm not at all sure what criteria to use to consider something as
   having "good enough" rustup support, so that information is all
   blank so far
 * I've taken a bit of a stab in the dark about what Rust version is
   recent enough for our purposes (1.31.0).  I strongly suspect we're
   going to want to move that to something more recent, but I don't
   know what, which will mean revising a bunch of stuff
 * I'm not really convinced that the way I've formatted it is
particularly good, but I haven't though of a better alternative.

Contributions to improving this would be most welcomed.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH] virtio-net : Add check for VIRTIO_NET_F_MAC

2021-09-29 Thread Cindy Lu
On Wed, Sep 29, 2021 at 9:36 PM Michael S. Tsirkin  wrote:
>
> On Wed, Sep 29, 2021 at 08:08:40PM +0800, Cindy Lu wrote:
> > On Wed, Sep 29, 2021 at 6:07 PM Michael Tokarev  wrote:
> > >
> > > 29.09.2021 09:52, Cindy Lu wrote:
> > > > For vdpa device, if the host support VIRTIO_NET_F_MAC
> > > > we need to read the mac address from hardware, so need
> > > > to check this bit, the logic is
> > > > 1 if the host support VIRTIO_NET_F_MAC and the mac address
> > > > is correct, qemu will use the mac address in hardware
> > > > 2.if the host not support , qemu will use the mac from cmdline
> > >
> > > So if hw supports NET_F_MAC, cmdline-provided parameter will
> > > silently be ignored?
> > >
> > yes, this is based on the virtio spec, you can check this document in
> > 5.1.5 Device Initialization
> > https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html
>
> Maybe use the hw mac if mac is not provided? If provided
> make sure the command line matches the hardware, and fail
> otherwise?
>

so here come to the final question. which mac address has the higher priority?
I think the NET_F_MAC bit means the hw mac address > command-line address.
if the hw drivers want to change this. they can simply remove this bit.


> > Also, this check it only working for vdpa device
> > > s/host not support/host does not support this feature/
> > Thanks , will fix this
> > >
> > > > 3.if the cmdline not provide mac address, qemu will use radam mac
> > > > address
> > >
> > > s/not/does not/
> > > s/radam/random/
> > >
> > thanks, will fix this
> > > Thanks,
> > >
> > > /mjt
> > >
>




Re: [PATCH v3 0/7] Reduce load on ppc target maintainers

2021-09-29 Thread David Gibson
On Mon, Sep 27, 2021 at 02:48:01PM +1000, David Gibson wrote:
> Greg Kurz and myself have been co-maintainers for the ppc and ppc64
> targets for some time now.  However, both our day job responsibilities
> and interests are leading us towards other areas, so we have less time
> to devote to this any more.
> 
> Therefore, here's a bunch of updates to MAINTAINERS, intended to
> reduce the load on us.  Mostly this is marking fairly obscure
> platforms as orphaned (if someone wants to take over maintainership,
> let me know ASAP).  Bigger changes may be coming, but we haven't
> decided exactly what that's going to look like yet.

Thanks for the feedback and acks on this.  I'm now merging v3 into
ppc-for-6.2 to go into a pull request shortly.

> 
> Changes since v2:
>  * Clarified overly broad TCG CPUs entry
> Changes since v1:
>  * Reworked how OpenPIC is listed
> 
> David Gibson (7):
>   qemu: Split machine_ppc.py acceptance tests
>   MAINTAINERS: Remove machine specific files from ppc TCG CPUs entry
>   MAINTAINERS: Remove David & Greg as reviewers for a number of boards
>   MAINTAINERS: Orphan obscure ppc platforms
>   MAINTAINERS: Remove David & Greg as reviewers/co-maintainers of
> powernv
>   MAINTAINERS: Add information for OpenPIC
>   MAINTAINERS: Demote sPAPR from "Supported" to "Maintained"
> 
>  MAINTAINERS  | 51 
>  tests/acceptance/machine_ppc.py  | 69 
>  tests/acceptance/ppc_mpc8544ds.py| 32 +
>  tests/acceptance/ppc_pseries.py  | 35 ++
>  tests/acceptance/ppc_virtex_ml507.py | 34 ++
>  5 files changed, 121 insertions(+), 100 deletions(-)
>  delete mode 100644 tests/acceptance/machine_ppc.py
>  create mode 100644 tests/acceptance/ppc_mpc8544ds.py
>  create mode 100644 tests/acceptance/ppc_pseries.py
>  create mode 100644 tests/acceptance/ppc_virtex_ml507.py
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH v12 00/16] machine: smp parsing fixes and improvement

2021-09-29 Thread wangyanan (Y)



On 2021/9/29 22:57, Paolo Bonzini wrote:

On 29/09/21 16:46, Markus Armbruster wrote:

Paolo Bonzini  writes:


On 29/09/21 04:58, Yanan Wang wrote:

Hi,
This is a new version (v12) with minor update suggested by Daniel
and Philippe. Two new commits (#1 and #16) are added. Thanks!


Queued, thanks!


Could you amend PATCH 16 to drop ERRP_GUARD() in machine_set_smp()?




Sure.



Ah, sorry, I missed that one line. Thank you for fixing it locally.

Yanan
.




Re: QAPI sync meeting

2021-09-29 Thread John Snow
On Wed, Sep 29, 2021 at 8:18 AM Markus Armbruster  wrote:

> John Snow  writes:
>
> > On Tue, Sep 28, 2021 at 9:53 AM Daniel P. Berrangé 
> > wrote:
> >
> >> On Mon, Sep 27, 2021 at 12:55:34PM -0400, John Snow wrote:
>
> [...]
>
> >> > - Any weekday after 13:00 UTC. Wednesdays, Thursdays and Fridays work
> >> > particularly well for me at the moment.
> >> > - bluejeans and google meeting both work well for me. Open to
> alternatives.
>
> Congratulations, you've just crowned yourself meeting organizer!  Pick a
> date, time, and place, then herd the cats there.
>
>
ACK. Will send out a proper invite.


> I can't do next Friday.  I think we should aim for next week, to give us
> time to coordinate and to prepare.
>
>
ACK


> >> Are you suggesting a 1-off meeting or a regular meeting, or an adhoc
> >> set of meetings ?
> >>
> >>
> > I'm proposing a one-off here just for some initial discussion and
> planning.
> > (We didn't have a QAPI BoF at KVM Forum and I'd like something similar
> > now.) I don't expect we'll really "finish" plotting out a roadmap in a
> > single 60 minute meeting, but it will probably give us good, targeted
> > discussions on the ML to follow. If more meetings are desired to discuss
> > specific topics, I'm not against scheduling them as-needed.
>
> Let's have one now, and then more as needed.
>
> Jumping into the meeting unprepared, then spending half of the alloted
> time listening to people explaining where they want to take QAPI seems
> like a bad use of our time.  Better: write it up and post it, so we can
> all read before we meet.  Do not reply, except perhaps to ask for
> clarification.  Discuss in the meeting.
>
> I can see such posts from Marc-Andre, Kevin, and Daniel.  Some of them
> could use a bit more detail, perhaps.
>
> Thoughts?
>
>
Works for me. I can send out a short summary beforehand, too, but I will
want to aggregate them in bullet-list digestible form for the meeting
minutes. I am suggesting a call to begin with so that I can set aside a
time specifically to discuss items with all interested parties ... at a
time when I'm awake. O:-)


> > (I personally don't really mind semi-regular sync-up meetings, like once
> > every other month or something to that effect -- it keeps me motivated
> and
> > on track. I suspect that Markus does not quite feel the same way about
> 'em,
> > so I'm just trying to not sign him up for stuff.)
>
> More regular meetings sit fairly low on my personal wish list.
>
>
O:-) O:-) O:-)


Re: [PATCH 0/1] hw: aspeed_gpio: Fix GPIO array indexing

2021-09-29 Thread Peter Delevoryas


> On Sep 27, 2021, at 8:43 PM, p...@fb.com wrote:
> 
> From: Peter Delevoryas 
> 
> Hey everyone,
> 
> I think there might be a bug in aspeed_gpio_update, where it's selecting
> a GPIO IRQ to update. The indexing that maps from GPIO pin to IRQ leads
> to an out-of-bounds array access and a segfault after that.
> 
> tl;dr
> 
> There's 8 rows of 32 pins (8 * 32 == 256 total) on the AST2500, but some
> of the pins are not actually active: there's only 228 pins actually
> active in the AST2500.
> 
> The GPIO IRQ array has length 228, but we index it using a matrix
> indexing scheme like [row][column], and end up out-of-bounds for
> high-numbered pins.
> 
> I fixed this by converting the IRQ array to a matrix, where some
> of the entries are uninitialized (zero). This retains the matrix
> indexing scheme, which I think is easy to understand.
> 
> Notes on reproducing:
> 
> I was testing booting Facebook's OpenBMC platform "YosemiteV2" (fby2)
> and hit a segfault:
> 
>  qemu-system-arm -machine ast2500-evb \
>  -drive file=fby2.mtd,format=raw,if=mtd \
>  -serial stdio -display none
>  ...
>  Setup Caching for Bridge IC info..done.
>  Setup Front Panel Daemon..done.
>  Setup fan speed...
>  FAN CONFIG : Single Rotor FAN
>  Unexpected 4 Servers config! Run FSC 4 TLs Config as default config
>  Setting Zone 0 speed to 70%
>  Setting Zone 1 speed to 70%
>  ok: run: fscd: (pid 1726) 0s
>  done.
>  Powering fru 1 to ON state...
>  Segmentation fault (core dumped)
> 
> In gdb:
> 
>  Thread 3 "qemu-system-arm" received signal SIGSEGV, Segmentation fault.
>  [Switching to Thread 0x720ee700 (LWP 1840353)]
>  qemu_set_irq (irq=0x, level=1) at ../hw/core/irq.c:45
>  45  irq->handler(irq->opaque, irq->n, level);
>  (gdb) p irq
>  $1 = (qemu_irq) 0x
>  (gdb) up
>  #1  0x558e36f5 in aspeed_gpio_update (s=0x77ecffb0, 
> regs=0x77ed0c94, value=128) at ../hw/gpio/aspeed_gpio.c:287
>  287 qemu_set_irq(s->gpios[offset], !!(new & mask));
>  (gdb) p s->gpios
>  $2 = {0x0 }
>  (gdb) p offset
>  $3 = 231
>  (gdb) p set
>  $5 = 7
>  (gdb) p gpio
>  $4 = 7
> 
> With my fix, I can boot the fby2 platform. The image I was using is here:
> 
> https://github.com/peterdelevoryas/openbmc/releases/tag/fby2.debug.mtd
> 
> Peter Delevoryas (1):
>  hw: aspeed_gpio: Fix GPIO array indexing
> 
> hw/gpio/aspeed_gpio.c | 72 ++-
> include/hw/gpio/aspeed_gpio.h |  5 +--
> 2 files changed, 31 insertions(+), 46 deletions(-)
> 
> -- 
> 2.30.2
> 

cc’ing Dan



Re: [PATCH 1/1] hw: aspeed_gpio: Fix pin I/O type declarations

2021-09-29 Thread Peter Delevoryas

> On Sep 28, 2021, at 3:53 AM, Damien Hedde  wrote:
> 
> 
> 
> On 9/28/21 05:24, p...@fb.com wrote:
>> From: Peter Delevoryas 
>> Some of the pin declarations in the Aspeed GPIO module were incorrect,
>> probably because of confusion over which bits in the input and output
>> uint32_t's correspond to which groups in the label array. Since the
>> uint32_t literals are in big endian, it's sort of the opposite of what
>> would be intuitive. The least significant bit in ast2500_set_props[6]
>> corresponds to GPIOY0, not GPIOAB7.
>> GPIOxx indicates input and output capabilities, GPIxx indicates only
>> input, GPOxx indicates only output.
>> AST2500:
>> - Previously had GPIW0..GPIW7 and GPIX0..GPIX7, that's correct.
>> - Previously had GPIOY0..GPIOY3, should have been GPIOY0..GPIOY7.
>> - Previously had GPIOAB0..GPIOAB3 and GPIAB4..GPIAB7, should only have
>>   been GPIOAB0..GPIOAB3.
>> AST2600:
>> - GPIOT0..GPIOT7 should have been GPIT0..GPIT7.
>> - GPIOU0..GPIOU7 should have been GPIU0..GPIU7.
>> - GPIW0..GPIW7 should have been GPIOW0..GPIOW7.
>> - GPIOY0..GPIOY7 and GPIOZ0...GPIOZ7 were disabled.
>> Fixes: 4b7f956862dc2db4c5c ("hw/gpio: Add basic Aspeed GPIO model for 
>> AST2400 and AST2500")
>> Fixes: 36d737ee82b2972167e ("hw/gpio: Add in AST2600 specific 
>> implementation")
>> Signed-off-by: Peter Delevoryas 
> 
> Reviewed-by: Damien Hedde 

cc’ing Dan

> 
>> ---
>>  hw/gpio/aspeed_gpio.c | 8 
>>  1 file changed, 4 insertions(+), 4 deletions(-)
>> diff --git a/hw/gpio/aspeed_gpio.c b/hw/gpio/aspeed_gpio.c
>> index dfa6d6cb40..33a40a624a 100644
>> --- a/hw/gpio/aspeed_gpio.c
>> +++ b/hw/gpio/aspeed_gpio.c
>> @@ -796,7 +796,7 @@ static const GPIOSetProperties ast2500_set_props[] = {
>>  [3] = {0x,  0x,  {"M", "N", "O", "P"} },
>>  [4] = {0x,  0x,  {"Q", "R", "S", "T"} },
>>  [5] = {0x,  0x,  {"U", "V", "W", "X"} },
>> -[6] = {0xff0f,  0x0f0f,  {"Y", "Z", "AA", "AB"} },
>> +[6] = {0x0fff,  0x0fff,  {"Y", "Z", "AA", "AB"} },
>>  [7] = {0x00ff,  0x00ff,  {"AC"} },
>>  };
>>  @@ -805,9 +805,9 @@ static GPIOSetProperties ast2600_3_3v_set_props[] = {
>>  [1] = {0x,  0x,  {"E", "F", "G", "H"} },
>>  [2] = {0x,  0x,  {"I", "J", "K", "L"} },
>>  [3] = {0x,  0x,  {"M", "N", "O", "P"} },
>> -[4] = {0x,  0x,  {"Q", "R", "S", "T"} },
>> -[5] = {0x,  0x,  {"U", "V", "W", "X"} },
>> -[6] = {0x,  0x0fff,  {"Y", "Z", "", ""} },
>> +[4] = {0x,  0x00ff,  {"Q", "R", "S", "T"} },
>> +[5] = {0x,  0xff00,  {"U", "V", "W", "X"} },
>> +[6] = {0x,  0x,  {"Y", "Z"} },
>>  };
>>static GPIOSetProperties ast2600_1_8v_set_props[] = {



[RFC PATCH 0/1] hw: aspeed_adc: Add initial Aspeed ADC support

2021-09-29 Thread pdel
From: Peter Delevoryas 

Hey everyone,

This patch mostly just does the basic, boilerplate setup for the ADC, so
that we can start adding more ADC feature emulation in the future.

The only device behavior that I tried to add was emulating the control
initialization sequence and the sequence for enabling "Auto compensating
sensing mode". I didn't even use something like a timer to delay the
response for each register write by a few cycles, it just immediately
updates the register.

I was looking at the Nuvoton ADC model, "hw/adc/npcm7xx_adc.c", and I
noticed that it has "enter_reset" and "hold_reset" methods, should I
implement the initialization sequence as part of the "Resettable"
interface? (It seems like I probably should, I'm just not really sure).

I could also add a timer to emulate the time it takes to initialize the
ADC, I see the Nuvoton module also has a constant
"NPCM7XX_ADC_RESET_CYCLES" but it's not used anywhere, but I imagine I
could do something similar to the conversion timer in that module?  I
think the upstream drivers poll every 0.5 ms and timeout after 500 ms,
so I could use that as a guide.

This patch, even without any conversion emulation, is useful to me
because some OpenBMC platforms use Aspeed SDK ADC drivers, and they
don't have error handling if the ADC doesn't respond, so they can't boot
in QEMU without some basic emulation like this.

Thanks,
Peter

Peter Delevoryas (1):
  hw: aspeed_adc: Add initial Aspeed ADC support

 hw/adc/aspeed_adc.c | 205 
 hw/adc/meson.build  |   1 +
 hw/adc/trace-events |   4 +
 hw/arm/aspeed_ast2600.c |  18 
 hw/arm/aspeed_soc.c |  17 +++
 include/hw/adc/aspeed_adc.h |  48 +
 include/hw/arm/aspeed_soc.h |   5 +
 7 files changed, 298 insertions(+)
 create mode 100644 hw/adc/aspeed_adc.c
 create mode 100644 include/hw/adc/aspeed_adc.h

-- 
2.30.2




[RFC PATCH 1/1] hw: aspeed_adc: Add initial Aspeed ADC support

2021-09-29 Thread pdel
From: Peter Delevoryas 

This change sets up Aspeed SoC ADC emulation, so that most ADC drivers
will pass the initialization sequence and load successfully. In the
future, we can extend this to emulate more features.

The initialization sequence is:

1. Set `ADC00` to `0xF`.
2. Wait for bit 8 of `ADC00` to be set.

I also added the sequence for enabling "Auto compensating sensing mode":

1. Set `ADC00` to `0x2F` (set bit 5).
2. Wait for bit 5 of `ADC00` to be reset (to zero).
3. ...
4. ...

Fuji (AST2600):
  Before:
[   56.185778] aspeed_adc: probe of 1e6e9000.adc failed with error -110
[   56.687936] aspeed_adc: probe of 1e6e9100.adc failed with error -110

  After:
aspeed_adc_read 0x0c read 0x
aspeed_adc_read 0x0c read 0x
aspeed_adc_write 0x00 write 0x000f
aspeed_adc_read 0x00 read 0x010f
aspeed_adc_read 0x00 read 0x010f
[   55.885164] aspeed_adc 1e6e9000.adc: trim 8
aspeed_adc_read 0xc4 read 0x
aspeed_adc_write 0xc4 write 0x0008
aspeed_adc_write 0x00 write 0x011f
aspeed_adc_write 0x00 write 0x1011f
aspeed_adc_read 0x10 read 0x
aspeed_adc_write 0x00 write 0x010f
[   55.886509] aspeed_adc 1e6e9000.adc: cv 512
aspeed_adc_write 0x00 write 0x010f
aspeed_adc_read 0x0c read 0x
aspeed_adc_read 0x0c read 0x
aspeed_adc_write 0x00 write 0x000f
aspeed_adc_read 0x00 read 0x010f
aspeed_adc_read 0x00 read 0x010f
[   55.890609] aspeed_adc 1e6e9100.adc: trim 8
aspeed_adc_read 0xc4 read 0x
aspeed_adc_write 0xc4 write 0x0008
aspeed_adc_write 0x00 write 0x011f
aspeed_adc_write 0x00 write 0x1011f
aspeed_adc_read 0x10 read 0x
aspeed_adc_write 0x00 write 0x010f
[   55.891863] aspeed_adc 1e6e9100.adc: cv 512
aspeed_adc_write 0x00 write 0x010f

YosemiteV2 (AST2500):
  Before:
[   20.561588] ast_adc ast_adc.0: ast_adc_probe
[   20.563741] hwmon hwmon0: write offset: c4, val: 8
[   20.563925] hwmon hwmon0: write offset: c, val: 40
[   20.564099] hwmon hwmon0: write offset: 0, val: f
[   21.066110] ast_adc: driver init failed (ret=-110)!
[   21.066635] ast_adc: probe of ast_adc.0 failed with error -110

  After:
aspeed_adc_write 0xc4 write 0x0008
aspeed_adc_write 0x0c write 0x0040
aspeed_adc_write 0x00 write 0x000f
aspeed_adc_read 0x00 read 0x010f
aspeed_adc_write 0x00 write 0x002f
aspeed_adc_read 0x00 read 0x000f
aspeed_adc_read 0xc4 read 0x0008
[   19.602033] ast_adc: driver successfully loaded.

Signed-off-by: Peter Delevoryas 
---
 hw/adc/aspeed_adc.c | 205 
 hw/adc/meson.build  |   1 +
 hw/adc/trace-events |   4 +
 hw/arm/aspeed_ast2600.c |  18 
 hw/arm/aspeed_soc.c |  17 +++
 include/hw/adc/aspeed_adc.h |  48 +
 include/hw/arm/aspeed_soc.h |   5 +
 7 files changed, 298 insertions(+)
 create mode 100644 hw/adc/aspeed_adc.c
 create mode 100644 include/hw/adc/aspeed_adc.h

diff --git a/hw/adc/aspeed_adc.c b/hw/adc/aspeed_adc.c
new file mode 100644
index 00..590936148b
--- /dev/null
+++ b/hw/adc/aspeed_adc.c
@@ -0,0 +1,205 @@
+/*
+ * Aspeed ADC Controller
+ *
+ * Copyright 2021 Facebook, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/adc/aspeed_adc.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "trace.h"
+#include "qemu/log.h"
+
+#define TO_REG(offset) ((offset) >> 2)
+#define ENGINE_CONTROL TO_REG(0x00)
+
+static uint64_t aspeed_adc_read(void *opaque, hwaddr offset, unsigned size)
+{
+AspeedADCState *s = ASPEED_ADC(opaque);
+int reg = TO_REG(offset);
+
+if (reg >= ASPEED_ADC_MAX_REGS) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: Out-of-bounds read 0x%04" HWADDR_PRIX "\n",
+  __func__, offset);
+return 0;
+}
+
+int value = s->regs[reg];
+
+trace_aspeed_adc_read(offset, value);
+return value;
+}
+
+static void aspeed_adc_write(void *opaque, hwaddr offset, uint64_t data,
+ unsigned size)
+{
+AspeedADCState *s = ASPEED_ADC(opaque);
+int reg = TO_REG(offset);
+
+if (reg >= ASPEED_ADC_MAX_REGS) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: Out-of-bounds write 0x%04" HWADDR_PRIX "\n",
+  __func__, offset);
+return;
+}
+
+trace_aspeed_adc_write(offset, data);
+
+switch (reg) {
+case ENGI

Re: QAPI sync meeting

2021-09-29 Thread John Snow
On Wed, Sep 29, 2021 at 9:52 AM Damien Hedde 
wrote:

>
>
> On 9/27/21 18:55, John Snow wrote:
> > Hiya,
> >
> > I'd like to propose that at least the three of us arrange a time to have
> > a meeting where we discuss our plans and ideas for QAPI going forward,
> > including rust, python, and golang extensions to the QAPI generator,
> > what we hope to accomplish with those projects, and so on.
> >
> > What I am hoping to get out of this for myself is a high-level overview
> > of people's plans for QAPI and to produce some notes on those plans so
> > that I can have a reference that we've all acknowledged as roughly
> > accurate to be able to keep the community's design goals for QAPI in
> > mind as I continue my own development. Ultimately, I'd like some kind of
> > rough draft of a "QAPI roadmap".
> >
> > I know there was a rust meetup during KVM Forum, but I was unable to
> > attend due to the timing. I'd like to expand the focus a little more
> > broadly to QAPI in general and discuss our "personal" roadmaps, goals,
> > queued work, etc so that we can collaboratively formulate a broader
> > vision of our work.
> >
> > I'm posting to qemu-devel in case anyone else has an interest in this
> > area and would like to eavesdrop or share opinions, but we should
> > probably come up with an agenda first. So:
> >
>
> I would be interested too.
>
> My current topic of interest is somewhat distant (roughly: "remote"
> machine assembly) but I would like to better understand the QAPI roadmap.
>
>
The area that greensocs is working on is of direct interest to me as well:
I want 100% QEMU configuration via QMP.

(And then either I drop the command line, or re-architect the CLI such that
it is 100% a mapping onto equivalent QMP, or ... something. It's been a
somewhat hot topic at times, but 100% QMP configuration seems like where
the meat of the work is, anyway. Everything after that is kinda just
'details'.)


> --
> Damien
>
>


Re: [PATCH 0/4] qemu-img compare --stat

2021-09-29 Thread John Snow
On Wed, Sep 29, 2021 at 9:34 AM Vladimir Sementsov-Ogievskiy <
vsement...@virtuozzo.com> wrote:

> Hi all!
>
> Recently we faced the following task:
>
> Customer comes and say: incremental backup images are too fat. Does you
> incremental backup works correct?
>
> What to answer? We should check something. At least check that
> incremental images doesn't store same data twice. And we don't have a
> tool for it. I just wrote a simple python script to compare raw files
> cluster-by-cluster. Then we've mounted the qcow2 images with help of
> qemu-nbd, the resulting /dev/nbd* were compared and we proved that
> incremental backups don't store same data.
>
>
Good idea. I love diagnostic tools!


> But that leads to idea that some kind of that script would be good to
> have at hand.
>
> So, here is a new option for qemu-img compare, that is a lot more
> powerful and effective than original script, and allows to compare and
> calculate statistics, i.e. how many clusters differs, how many
> clusters changed from unallocated to data, and so on.
>
> For examples of output look at the test in patch 04.
>
> Vladimir Sementsov-Ogievskiy (4):
>   qemu-img: implement compare --stat
>   qemu-img: make --block-size optional for compare --stat
>   qemu-img: add --shallow option for qemu-img compare --stat
>   iotests: add qemu-img-compare-stat test
>
>  docs/tools/qemu-img.rst   |  29 +-
>  qemu-img.c| 275 +-
>  qemu-img-cmds.hx  |   4 +-
>  .../qemu-iotests/tests/qemu-img-compare-stat  |  88 ++
>

And new tests! :-)


>  .../tests/qemu-img-compare-stat.out   | 106 +++
>  5 files changed, 484 insertions(+), 18 deletions(-)
>  create mode 100755 tests/qemu-iotests/tests/qemu-img-compare-stat
>  create mode 100644 tests/qemu-iotests/tests/qemu-img-compare-stat.out
>
>
>


[Bug 1945540] Re: Java crashes on s390x VM with SIGILL/ILL_PRVOPC at '__kernel_getcpu+0x8'

2021-09-29 Thread John Neffenger
** Attachment added: "the log file resulting from 'java --version'"
   
https://bugs.launchpad.net/qemu/+bug/1945540/+attachment/5529211/+files/hs_err_pid6789.log

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1945540

Title:
  Java crashes on s390x VM with SIGILL/ILL_PRVOPC at
  '__kernel_getcpu+0x8'

Status in QEMU:
  New

Bug description:
  Host environment

  - Operating system: Ubuntu 20.04.3 LTS Desktop
  - OS/kernel version: Linux tower 5.11.0-37-generic #41~20.04.2-Ubuntu
  SMP Fri Sep 24 09:06:38 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
  - Architecture: amd64
  - QEMU flavor: qemu-system-s390x
  - QEMU version: QEMU emulator version 4.2.1 (Debian 1:4.2-3ubuntu6.17)
  - QEMU command line: See attached file 'command-line.txt'

  Emulated/Virtualized environment

  - Operating system: Ubuntu 20.04.3 LTS Server
  - OS/kernel version: Linux s390x-focal 5.4.0-88-generic #99-Ubuntu
  SMP Thu Sep 23 17:27:44 UTC 2021 s390x s390x s390x GNU/Linux
  - Architecture: s390x

  Description of problem

  Java crashes as shown below:

  $ java --version
  #
  # A fatal error has been detected by the Java Runtime Environment:
  #
  #  SIGILL (0x4) at pc=0x03ff9f5fe6f4, pid=6789, tid=6818
  #
  # JRE version:  (17.0+35) (build )
  # Java VM: OpenJDK 64-Bit Server VM (17+35-snap, mixed mode, sharing,
  # tiered, compressed oops, compressed class ptrs, g1 gc, linux-s390x)
  # Problematic frame:
  # C  [linux-vdso64.so.1+0x6f8]  __kernel_getcpu+0x8
  #
  # Core dump will be written. Default location: core.6789 (may not
  # exist)
  #
  # An error report file with more information is saved as:
  # /home/ubuntu/src/hs_err_pid6789.log
  #
  #
  Aborted (core dumped)

  Steps to reproduce

  Run any Java program to reproduce the problem.

  Because the 'openjdk' packages in Ubuntu run the 'java' command during
  installation, they hit the same error and fail to install. As an
  alternative, you can install the OpenJDK Snap package for the 's390x'
  architecture as follows:

$ sudo snap install openjdk

  The OpenJDK Snap package has been tested to work on a real IBM/S390
  8561 system, namely the IBM LinuxONE III LT1 at Marist College:

Marist College Installs World’s First IBM LinuxONE III™
https://www.marist.edu/-/marist-first-linuxone-iii

  Additional information

  See the following attached files:

  command-line.txt - the command-line used to start the virtual machine
  hs_err_pid6789.log - the log file resulting from 'java --version'

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1945540/+subscriptions




[Bug 1945540] [NEW] Java crashes on s390x VM with SIGILL/ILL_PRVOPC at '__kernel_getcpu+0x8'

2021-09-29 Thread John Neffenger
Public bug reported:

Host environment

- Operating system: Ubuntu 20.04.3 LTS Desktop
- OS/kernel version: Linux tower 5.11.0-37-generic #41~20.04.2-Ubuntu
SMP Fri Sep 24 09:06:38 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
- Architecture: amd64
- QEMU flavor: qemu-system-s390x
- QEMU version: QEMU emulator version 4.2.1 (Debian 1:4.2-3ubuntu6.17)
- QEMU command line: See attached file 'command-line.txt'

Emulated/Virtualized environment

- Operating system: Ubuntu 20.04.3 LTS Server
- OS/kernel version: Linux s390x-focal 5.4.0-88-generic #99-Ubuntu
SMP Thu Sep 23 17:27:44 UTC 2021 s390x s390x s390x GNU/Linux
- Architecture: s390x

Description of problem

Java crashes as shown below:

$ java --version
#
# A fatal error has been detected by the Java Runtime Environment:
#
#  SIGILL (0x4) at pc=0x03ff9f5fe6f4, pid=6789, tid=6818
#
# JRE version:  (17.0+35) (build )
# Java VM: OpenJDK 64-Bit Server VM (17+35-snap, mixed mode, sharing,
# tiered, compressed oops, compressed class ptrs, g1 gc, linux-s390x)
# Problematic frame:
# C  [linux-vdso64.so.1+0x6f8]  __kernel_getcpu+0x8
#
# Core dump will be written. Default location: core.6789 (may not
# exist)
#
# An error report file with more information is saved as:
# /home/ubuntu/src/hs_err_pid6789.log
#
#
Aborted (core dumped)

Steps to reproduce

Run any Java program to reproduce the problem.

Because the 'openjdk' packages in Ubuntu run the 'java' command during
installation, they hit the same error and fail to install. As an
alternative, you can install the OpenJDK Snap package for the 's390x'
architecture as follows:

  $ sudo snap install openjdk

The OpenJDK Snap package has been tested to work on a real IBM/S390 8561
system, namely the IBM LinuxONE III LT1 at Marist College:

  Marist College Installs World’s First IBM LinuxONE III™
  https://www.marist.edu/-/marist-first-linuxone-iii

Additional information

See the following attached files:

command-line.txt - the command-line used to start the virtual machine
hs_err_pid6789.log - the log file resulting from 'java --version'

** Affects: qemu
 Importance: Undecided
 Status: New

** Attachment added: "the command-line used to start the virtual machine"
   
https://bugs.launchpad.net/bugs/1945540/+attachment/5529210/+files/command-line.txt

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1945540

Title:
  Java crashes on s390x VM with SIGILL/ILL_PRVOPC at
  '__kernel_getcpu+0x8'

Status in QEMU:
  New

Bug description:
  Host environment

  - Operating system: Ubuntu 20.04.3 LTS Desktop
  - OS/kernel version: Linux tower 5.11.0-37-generic #41~20.04.2-Ubuntu
  SMP Fri Sep 24 09:06:38 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
  - Architecture: amd64
  - QEMU flavor: qemu-system-s390x
  - QEMU version: QEMU emulator version 4.2.1 (Debian 1:4.2-3ubuntu6.17)
  - QEMU command line: See attached file 'command-line.txt'

  Emulated/Virtualized environment

  - Operating system: Ubuntu 20.04.3 LTS Server
  - OS/kernel version: Linux s390x-focal 5.4.0-88-generic #99-Ubuntu
  SMP Thu Sep 23 17:27:44 UTC 2021 s390x s390x s390x GNU/Linux
  - Architecture: s390x

  Description of problem

  Java crashes as shown below:

  $ java --version
  #
  # A fatal error has been detected by the Java Runtime Environment:
  #
  #  SIGILL (0x4) at pc=0x03ff9f5fe6f4, pid=6789, tid=6818
  #
  # JRE version:  (17.0+35) (build )
  # Java VM: OpenJDK 64-Bit Server VM (17+35-snap, mixed mode, sharing,
  # tiered, compressed oops, compressed class ptrs, g1 gc, linux-s390x)
  # Problematic frame:
  # C  [linux-vdso64.so.1+0x6f8]  __kernel_getcpu+0x8
  #
  # Core dump will be written. Default location: core.6789 (may not
  # exist)
  #
  # An error report file with more information is saved as:
  # /home/ubuntu/src/hs_err_pid6789.log
  #
  #
  Aborted (core dumped)

  Steps to reproduce

  Run any Java program to reproduce the problem.

  Because the 'openjdk' packages in Ubuntu run the 'java' command during
  installation, they hit the same error and fail to install. As an
  alternative, you can install the OpenJDK Snap package for the 's390x'
  architecture as follows:

$ sudo snap install openjdk

  The OpenJDK Snap package has been tested to work on a real IBM/S390
  8561 system, namely the IBM LinuxONE III LT1 at Marist College:

Marist College Installs World’s First IBM LinuxONE III™
https://www.marist.edu/-/marist-first-linuxone-iii

  Additional information

  See the following attached files:

  command-line.txt - the command-line used to start the virtual machine
  hs_err_pid6789.log - the log file resulting from 'java --version'

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1945540/+subscriptions




Re: [PATCH v2 1/1] hw/riscv: shakti_c: Mark as not user creatable

2021-09-29 Thread Alistair Francis
On Wed, Sep 29, 2021 at 7:47 PM Bin Meng  wrote:
>
> On Wed, Sep 29, 2021 at 7:30 AM Alistair Francis
>  wrote:
> >
> > From: Alistair Francis 
> >
> > Mark the shakti_c machine as not user creatable.
> >
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/639
> > Signed-off-by: Alistair Francis 
> > ---
> >  hw/riscv/shakti_c.c | 6 ++
> >  1 file changed, 6 insertions(+)
> >
> > diff --git a/hw/riscv/shakti_c.c b/hw/riscv/shakti_c.c
> > index 2f084d3c8d..e7cd3bdfb4 100644
> > --- a/hw/riscv/shakti_c.c
> > +++ b/hw/riscv/shakti_c.c
> > @@ -150,6 +150,12 @@ static void shakti_c_soc_class_init(ObjectClass 
> > *klass, void *data)
> >  {
> >  DeviceClass *dc = DEVICE_CLASS(klass);
> >  dc->realize = shakti_c_soc_state_realize;
> > +/* Reasons:
>
> nits: the multi-line comment format should use:
>
> /*
>  *
>  */

Fixed when applying the patch

Applied to riscv-to-apply.next

Alistair

>
> > + * - Creates CPUS in riscv_hart_realize(), and can create 
> > unintended
> > + *   CPUs
> > + * - Uses serial_hds in realize function, thus can't be used twice
> > + */
> > +dc->user_creatable = false;
> >  }
> >
> >  static void shakti_c_soc_instance_init(Object *obj)
>
>
> Otherwise,
> Reviewed-by: Bin Meng 



RE: virtio-gpu: Get FD for texture

2021-09-29 Thread Kasireddy, Vivek
Hi Antonio,

> 
> I am starting to believe that the error is due to the fact that no EGLContext 
> is active on the
> current thread (the one running the Vulkan application).
[Kasireddy, Vivek] Which UI module (and Host environment) are you testing with?
gtk? egl-headless? Could you please provide more details about the environment 
and
the use-case?

> 
> Trying to call eglMakeCurrent within this thread gives me an EGL_BAD_ACCESS 
> error
> as the EGLContext associated to the GL texture belongs to a different thread.
[Kasireddy, Vivek] IIUC, contexts can only be bound to one thread at a time. So 
you either
need to release the context in the other thread (eglMakeCurrent(NULL, NULL) 
before making
it current in your current thread or create a shared context between both the 
threads to be able
to share textures.

Thanks,
Vivek

> 
> Does that make sense?
> 
> Kind regards,
> Antonio Caggiano
> 
> On 27/09/21 12:21, Antonio Caggiano wrote:
> > Hi,
> >
> > I am trying to support a Vulkan application in the guest
> > (GTKGlArea+VirGL+venus) which needs to import a GL texture from a GL
> > context.
> >
> > Before doing that, I need to get a FD for that texture, therefore I
> > tried with calling egl-helpers.h:egl_get_fd_for_texture() but I get an
> > epoxy error:
> >
> >  > No provider of eglCreateImageKHR found.  Requires one of:
> >
> >  >   EGL_KHR_image
> >
> >  >   EGL_KHR_image_base
> >
> > This is a bit weird to me as I am sure I am running QEMU with iris and
> > according to eglinfo both of these extensions are available.
> >
> > Do you think my approach makes sense or I am doing something wrong
> > somewhere?
> >
> >
> > Kind regards,
> > Antonio Caggiano


[PATCH qemu v2] QEMU coding style mandates spaces for indentation. This change replaces TABs in hw/ide/ and include/hw/ide.

2021-09-29 Thread ~farzon
From: Farzon Lotfi 

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/371

Signed-off-by: Farzon Lotfi 
---
 hw/ide/cmd646.c   |  28 +--
 hw/ide/core.c |  84 -
 hw/ide/microdrive.c   | 360 +++---
 include/hw/ide/internal.h | 248 +-
 4 files changed, 360 insertions(+), 360 deletions(-)

diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c
index c254631485..d58f1cce45 100644
--- a/hw/ide/cmd646.c
+++ b/hw/ide/cmd646.c
@@ -36,20 +36,20 @@
 #include "trace.h"
 
 /* CMD646 specific */
-#define CFR0x50
-#define   CFR_INTR_CH0 0x04
-#define CNTRL  0x51
-#define   CNTRL_EN_CH0 0x04
-#define   CNTRL_EN_CH1 0x08
-#define ARTTIM23   0x57
-#defineARTTIM23_INTR_CH1   0x10
-#define MRDMODE0x71
-#define   MRDMODE_INTR_CH0 0x04
-#define   MRDMODE_INTR_CH1 0x08
-#define   MRDMODE_BLK_CH0  0x10
-#define   MRDMODE_BLK_CH1  0x20
-#define UDIDETCR0  0x73
-#define UDIDETCR1  0x7B
+#define CFR 0x50
+#define   CFR_INTR_CH0  0x04
+#define CNTRL   0x51
+#define   CNTRL_EN_CH0  0x04
+#define   CNTRL_EN_CH1  0x08
+#define ARTTIM230x57
+#defineARTTIM23_INTR_CH10x10
+#define MRDMODE 0x71
+#define   MRDMODE_INTR_CH0  0x04
+#define   MRDMODE_INTR_CH1  0x08
+#define   MRDMODE_BLK_CH0   0x10
+#define   MRDMODE_BLK_CH1   0x20
+#define UDIDETCR0   0x73
+#define UDIDETCR1   0x7B
 
 static void cmd646_update_irq(PCIDevice *pd);
 
diff --git a/hw/ide/core.c b/hw/ide/core.c
index fd69ca3167..c2a2fab74c 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -311,52 +311,52 @@ static void ide_cfata_identify(IDEState *s)
 
 cur_sec = s->cylinders * s->heads * s->sectors;
 
-put_le16(p + 0, 0x848a);   /* CF Storage Card signature */
-put_le16(p + 1, s->cylinders); /* Default cylinders */
-put_le16(p + 3, s->heads); /* Default heads */
-put_le16(p + 6, s->sectors);   /* Default sectors per track */
+put_le16(p + 0, 0x848a);/* CF Storage Card signature */
+put_le16(p + 1, s->cylinders);  /* Default cylinders */
+put_le16(p + 3, s->heads);  /* Default heads */
+put_le16(p + 6, s->sectors);/* Default sectors per track */
 /* *(p + 7) := nb_sectors >> 16 -- see ide_cfata_identify_size */
 /* *(p + 8) := nb_sectors   -- see ide_cfata_identify_size */
 padstr((char *)(p + 10), s->drive_serial_str, 20); /* serial number */
-put_le16(p + 22, 0x0004);  /* ECC bytes */
-padstr((char *) (p + 23), s->version, 8);  /* Firmware Revision */
+put_le16(p + 22, 0x0004);   /* ECC bytes */
+padstr((char *) (p + 23), s->version, 8);   /* Firmware Revision */
 padstr((char *) (p + 27), s->drive_model_str, 40);/* Model number */
 #if MAX_MULT_SECTORS > 1
 put_le16(p + 47, 0x8000 | MAX_MULT_SECTORS);
 #else
 put_le16(p + 47, 0x);
 #endif
-put_le16(p + 49, 0x0f00);  /* Capabilities */
-put_le16(p + 51, 0x0002);  /* PIO cycle timing mode */
-put_le16(p + 52, 0x0001);  /* DMA cycle timing mode */
-put_le16(p + 53, 0x0003);  /* Translation params valid */
-put_le16(p + 54, s->cylinders);/* Current cylinders */
-put_le16(p + 55, s->heads);/* Current heads */
-put_le16(p + 56, s->sectors);  /* Current sectors */
-put_le16(p + 57, cur_sec); /* Current capacity */
-put_le16(p + 58, cur_sec >> 16);   /* Current capacity */
-if (s->mult_sectors)   /* Multiple sector setting */
+put_le16(p + 49, 0x0f00);   /* Capabilities */
+put_le16(p + 51, 0x0002);   /* PIO cycle timing mode */
+put_le16(p + 52, 0x0001);   /* DMA cycle timing mode */
+put_le16(p + 53, 0x0003);   /* Translation params valid */
+put_le16(p + 54, s->cylinders); /* Current cylinders */
+put_le16(p + 55, s->heads); /* Current heads */
+put_le16(p + 56, s->sectors);   /* Current sectors */
+put_le16(p + 57, cur_sec);  /* Current capacity */
+put_le16(p + 58, cur_sec >> 16);/* Current capacity */
+if (s->mult_sectors)/* Multiple sector setting */
 put_le16(p + 59, 0x100 | s->mult_sectors);
 /* *(p + 60) := nb_sectors   -- see ide_cfata_identify_size */
 /* *(p + 61) := nb_sectors >> 16 -- see ide_cfata_identify_size */
-put_le16(p + 63, 0x0203);  /* Multiword DMA capability */
-put_le16(p + 64, 0x0001);  /* Flow Control PIO support */
-put_le16(p + 65, 0x0096);  /* Min. Multiword DMA cycle */
-put_le16(p + 66, 0x0096);  /* Rec. Multiword DMA cycle */
-put_le16(p + 68, 0x00b4);  /* Min. PIO cycle time */
-put_le16(p + 82, 0x400c);

[PATCH qemu v2] QEMU coding style mandates spaces for indentation. This change replaces TABs in hw/block/ and include/hw/block.

2021-09-29 Thread ~farzon
From: Farzon Lotfi 

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/371

Signed-off-by: Farzon Lotfi 
---
 hw/block/fdc.c   |   4 +-
 hw/block/nand.c  | 212 +++
 hw/block/onenand.c   | 126 +++
 hw/block/tc58128.c   | 136 -
 include/hw/block/flash.h |  20 ++--
 5 files changed, 249 insertions(+), 249 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 9014cd30b3..347343af35 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -595,8 +595,8 @@ enum {
 };
 
 enum {
-FD_STATE_MULTI  = 0x01,/* multi track flag */
-FD_STATE_FORMAT = 0x02,/* format flag */
+FD_STATE_MULTI  = 0x01, /* multi track flag */
+FD_STATE_FORMAT = 0x02, /* format flag */
 };
 
 enum {
diff --git a/hw/block/nand.c b/hw/block/nand.c
index 8bc80e3514..781a27d2e1 100644
--- a/hw/block/nand.c
+++ b/hw/block/nand.c
@@ -30,33 +30,33 @@
 #include "qemu/module.h"
 #include "qom/object.h"
 
-# define NAND_CMD_READ00x00
-# define NAND_CMD_READ10x01
-# define NAND_CMD_READ20x50
-# define NAND_CMD_LPREAD2  0x30
-# define NAND_CMD_NOSERIALREAD20x35
-# define NAND_CMD_RANDOMREAD1  0x05
-# define NAND_CMD_RANDOMREAD2  0xe0
-# define NAND_CMD_READID   0x90
-# define NAND_CMD_RESET0xff
-# define NAND_CMD_PAGEPROGRAM1 0x80
-# define NAND_CMD_PAGEPROGRAM2 0x10
-# define NAND_CMD_CACHEPROGRAM20x15
-# define NAND_CMD_BLOCKERASE1  0x60
-# define NAND_CMD_BLOCKERASE2  0xd0
-# define NAND_CMD_READSTATUS   0x70
-# define NAND_CMD_COPYBACKPRG1 0x85
-
-# define NAND_IOSTATUS_ERROR   (1 << 0)
-# define NAND_IOSTATUS_PLANE0  (1 << 1)
-# define NAND_IOSTATUS_PLANE1  (1 << 2)
-# define NAND_IOSTATUS_PLANE2  (1 << 3)
-# define NAND_IOSTATUS_PLANE3  (1 << 4)
+# define NAND_CMD_READ0 0x00
+# define NAND_CMD_READ1 0x01
+# define NAND_CMD_READ2 0x50
+# define NAND_CMD_LPREAD2   0x30
+# define NAND_CMD_NOSERIALREAD2 0x35
+# define NAND_CMD_RANDOMREAD1   0x05
+# define NAND_CMD_RANDOMREAD2   0xe0
+# define NAND_CMD_READID0x90
+# define NAND_CMD_RESET 0xff
+# define NAND_CMD_PAGEPROGRAM1  0x80
+# define NAND_CMD_PAGEPROGRAM2  0x10
+# define NAND_CMD_CACHEPROGRAM2 0x15
+# define NAND_CMD_BLOCKERASE1   0x60
+# define NAND_CMD_BLOCKERASE2   0xd0
+# define NAND_CMD_READSTATUS0x70
+# define NAND_CMD_COPYBACKPRG1  0x85
+
+# define NAND_IOSTATUS_ERROR(1 << 0)
+# define NAND_IOSTATUS_PLANE0   (1 << 1)
+# define NAND_IOSTATUS_PLANE1   (1 << 2)
+# define NAND_IOSTATUS_PLANE2   (1 << 3)
+# define NAND_IOSTATUS_PLANE3   (1 << 4)
 # define NAND_IOSTATUS_READY(1 << 6)
-# define NAND_IOSTATUS_UNPROTCT(1 << 7)
+# define NAND_IOSTATUS_UNPROTCT (1 << 7)
 
-# define MAX_PAGE  0x800
-# define MAX_OOB   0x40
+# define MAX_PAGE   0x800
+# define MAX_OOB0x40
 
 typedef struct NANDFlashState NANDFlashState;
 struct NANDFlashState {
@@ -102,40 +102,40 @@ static void mem_and(uint8_t *dest, const uint8_t *src, 
size_t n)
 }
 }
 
-# define NAND_NO_AUTOINCR  0x0001
-# define NAND_BUSWIDTH_16  0x0002
-# define NAND_NO_PADDING   0x0004
-# define NAND_CACHEPRG 0x0008
-# define NAND_COPYBACK 0x0010
-# define NAND_IS_AND   0x0020
-# define NAND_4PAGE_ARRAY  0x0040
-# define NAND_NO_READRDY   0x0100
-# define NAND_SAMSUNG_LP   (NAND_NO_PADDING | NAND_COPYBACK)
+# define NAND_NO_AUTOINCR   0x0001
+# define NAND_BUSWIDTH_16   0x0002
+# define NAND_NO_PADDING0x0004
+# define NAND_CACHEPRG  0x0008
+# define NAND_COPYBACK  0x0010
+# define NAND_IS_AND0x0020
+# define NAND_4PAGE_ARRAY   0x0040
+# define NAND_NO_READRDY0x0100
+# define NAND_SAMSUNG_LP(NAND_NO_PADDING | NAND_COPYBACK)
 
 # define NAND_IO
 
-# define PAGE(addr)((addr) >> ADDR_SHIFT)
+# define PAGE(addr) ((addr) >> ADDR_SHIFT)
 # define PAGE_START(page)   (PAGE(page) * (NAND_PAGE_SIZE + OOB_SIZE))
-# define PAGE_MASK ((1 << ADDR_SHIFT) - 1)
-# define OOB_SHIFT (PAGE_SHIFT - 5)
-# define OOB_SIZE  (1 << OOB_SHIFT)
-# define SECTOR(addr)  ((addr) >> (9 + ADDR_SHIFT - PAGE_SHIFT))
-# define SECTOR_OFFSET(addr)   ((addr) & ((511 >> PAGE_SHIFT) << 8))
+# define PAGE_MASK  ((1 << ADDR_SHIFT) - 1)
+# define OOB_SHIFT  (PAGE_SHIFT - 5)
+# define OOB_SIZE   (1 << OOB_SHIFT)
+# define SECTOR(addr)   ((addr) >> (9 + ADDR_SHIFT - PAGE_SHIFT))
+# define SECTOR_OFFSET(addr)((addr) & ((511 >> PAGE_SHIFT) << 8))
 
 # define NAND_PAGE_SIZE 256
-# define PAGE_SHIFT8
-# define PAGE_SECTORS  1
-# define ADDR_SHIFT8
+# define PAGE_SHIFT 8
+# define PAGE_SECTORS   1
+# define ADDR_SHIFT 8
 # include "nand.c"
 # define NAND_PAGE_SIZE 512
-# define PAGE_SHIFT9
-# define PAGE_SE

[PATCH qemu v2] QEMU coding style mandates spaces for indentation. This change replaces TABs in block files.

2021-09-29 Thread ~farzon
From: Farzon Lotfi 

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/371

Signed-off-by: Farzon Lotfi 
---
 block/bochs.c   | 10 +-
 block/file-posix.c  |  8 
 block/file-win32.c  | 20 ++--
 block/parallels.c   | 10 +-
 block/qcow.c| 10 +-
 include/block/nbd.h |  2 +-
 6 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/block/bochs.c b/block/bochs.c
index 2f010ab40a..01b84625c0 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -293,14 +293,14 @@ static void bochs_close(BlockDriverState *bs)
 }
 
 static BlockDriver bdrv_bochs = {
-.format_name   = "bochs",
-.instance_size = sizeof(BDRVBochsState),
-.bdrv_probe= bochs_probe,
-.bdrv_open = bochs_open,
+.format_name= "bochs",
+.instance_size  = sizeof(BDRVBochsState),
+.bdrv_probe = bochs_probe,
+.bdrv_open  = bochs_open,
 .bdrv_child_perm = bdrv_default_perms,
 .bdrv_refresh_limits = bochs_refresh_limits,
 .bdrv_co_preadv = bochs_co_preadv,
-.bdrv_close= bochs_close,
+.bdrv_close = bochs_close,
 .is_format  = true,
 };
 
diff --git a/block/file-posix.c b/block/file-posix.c
index d81e15efa4..9fc065506d 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -127,7 +127,7 @@
 #define FTYPE_FILE   0
 #define FTYPE_CD 1
 
-#define MAX_BLOCKSIZE  4096
+#define MAX_BLOCKSIZE   4096
 
 /* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
  * leaving a few more bytes for its future use. */
@@ -3647,7 +3647,7 @@ static BlockDriver bdrv_host_device = {
 .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
 .bdrv_co_truncate   = raw_co_truncate,
-.bdrv_getlength= raw_getlength,
+.bdrv_getlength = raw_getlength,
 .bdrv_get_info = raw_get_info,
 .bdrv_get_allocated_file_size
 = raw_get_allocated_file_size,
@@ -3750,7 +3750,7 @@ static BlockDriver bdrv_host_cdrom = {
 .protocol_name  = "host_cdrom",
 .instance_size  = sizeof(BDRVRawState),
 .bdrv_needs_filename = true,
-.bdrv_probe_device = cdrom_probe_device,
+.bdrv_probe_device  = cdrom_probe_device,
 .bdrv_parse_filename = cdrom_parse_filename,
 .bdrv_file_open = cdrom_open,
 .bdrv_close = raw_close,
@@ -3881,7 +3881,7 @@ static BlockDriver bdrv_host_cdrom = {
 .protocol_name  = "host_cdrom",
 .instance_size  = sizeof(BDRVRawState),
 .bdrv_needs_filename = true,
-.bdrv_probe_device = cdrom_probe_device,
+.bdrv_probe_device  = cdrom_probe_device,
 .bdrv_parse_filename = cdrom_parse_filename,
 .bdrv_file_open = cdrom_open,
 .bdrv_close = raw_close,
diff --git a/block/file-win32.c b/block/file-win32.c
index b97c58d642..f80e62faf1 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -743,9 +743,9 @@ static QemuOptsList raw_create_opts = {
 };
 
 BlockDriver bdrv_file = {
-.format_name   = "file",
-.protocol_name = "file",
-.instance_size = sizeof(BDRVRawState),
+.format_name= "file",
+.protocol_name  = "file",
+.instance_size  = sizeof(BDRVRawState),
 .bdrv_needs_filename = true,
 .bdrv_parse_filename = raw_parse_filename,
 .bdrv_file_open = raw_open,
@@ -763,7 +763,7 @@ BlockDriver bdrv_file = {
 .bdrv_aio_flush = raw_aio_flush,
 
 .bdrv_co_truncate   = raw_co_truncate,
-.bdrv_getlength= raw_getlength,
+.bdrv_getlength = raw_getlength,
 .bdrv_get_allocated_file_size
 = raw_get_allocated_file_size,
 
@@ -915,14 +915,14 @@ done:
 }
 
 static BlockDriver bdrv_host_device = {
-.format_name   = "host_device",
-.protocol_name = "host_device",
-.instance_size = sizeof(BDRVRawState),
+.format_name= "host_device",
+.protocol_name  = "host_device",
+.instance_size  = sizeof(BDRVRawState),
 .bdrv_needs_filename = true,
 .bdrv_parse_filename = hdev_parse_filename,
-.bdrv_probe_device = hdev_probe_device,
-.bdrv_file_open= hdev_open,
-.bdrv_close= raw_close,
+.bdrv_probe_device  = hdev_probe_device,
+.bdrv_file_open = hdev_open,
+.bdrv_close = raw_close,
 .bdrv_refresh_limits = hdev_refresh_limits,
 
 .bdrv_aio_preadv= raw_aio_preadv,
diff --git a/block/parallels.c b/block/parallels.c
index 6ebad2a2bb..629d8aae2b 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -916,11 +916,11 @@ static void parallels_close(BlockDriverState *bs)
 }
 
 static BlockDriver bdrv_parallels = {
-.format_name   = "parallels",
-.instance_size = sizeof(BDRVParallelsState),
-.bdrv_probe= parallels_probe,
-.bdrv_open = parallels_open,
-.bdrv_close= parallels_close,
+.format_name= "parallels",
+.instance_size  = sizeof(BDRVParallelsState),
+.bdrv_pro

Re: [PATCH v3] target/i386: Include 'hw/i386/apic.h' locally

2021-09-29 Thread Michael S. Tsirkin
On Wed, Sep 29, 2021 at 06:31:24PM +0200, Philippe Mathieu-Daudé wrote:
> Instead of including a sysemu-specific header in "cpu.h"
> (which is shared with user-mode emulations), include it
> locally when required.
> 
> Acked-by: Paolo Bonzini 
> Signed-off-by: Philippe Mathieu-Daudé 

Acked-by: Michael S. Tsirkin 

> ---
>  target/i386/cpu.h| 4 
>  hw/i386/kvmvapic.c   | 1 +
>  hw/i386/x86.c| 1 +
>  target/i386/cpu-dump.c   | 1 +
>  target/i386/cpu-sysemu.c | 1 +
>  target/i386/cpu.c| 1 +
>  target/i386/gdbstub.c| 4 
>  target/i386/hax/hax-all.c| 1 +
>  target/i386/helper.c | 1 +
>  target/i386/hvf/hvf.c| 1 +
>  target/i386/hvf/x86_emu.c| 1 +
>  target/i386/nvmm/nvmm-all.c  | 1 +
>  target/i386/tcg/sysemu/misc_helper.c | 1 +
>  target/i386/tcg/sysemu/seg_helper.c  | 1 +
>  target/i386/whpx/whpx-all.c  | 1 +
>  15 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index c2954c71ea0..4411718bb7a 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -2045,10 +2045,6 @@ typedef X86CPU ArchCPU;
>  #include "exec/cpu-all.h"
>  #include "svm.h"
>  
> -#if !defined(CONFIG_USER_ONLY)
> -#include "hw/i386/apic.h"
> -#endif
> -
>  static inline void cpu_get_tb_cpu_state(CPUX86State *env, target_ulong *pc,
>  target_ulong *cs_base, uint32_t 
> *flags)
>  {
> diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
> index 43f8a8f679e..7333818bdd1 100644
> --- a/hw/i386/kvmvapic.c
> +++ b/hw/i386/kvmvapic.c
> @@ -16,6 +16,7 @@
>  #include "sysemu/hw_accel.h"
>  #include "sysemu/kvm.h"
>  #include "sysemu/runstate.h"
> +#include "hw/i386/apic.h"
>  #include "hw/i386/apic_internal.h"
>  #include "hw/sysbus.h"
>  #include "hw/boards.h"
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 00448ed55aa..e0218f8791f 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -43,6 +43,7 @@
>  #include "target/i386/cpu.h"
>  #include "hw/i386/topology.h"
>  #include "hw/i386/fw_cfg.h"
> +#include "hw/i386/apic.h"
>  #include "hw/intc/i8259.h"
>  #include "hw/rtc/mc146818rtc.h"
>  
> diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c
> index 02b635a52cf..0158fd2bf28 100644
> --- a/target/i386/cpu-dump.c
> +++ b/target/i386/cpu-dump.c
> @@ -22,6 +22,7 @@
>  #include "qemu/qemu-print.h"
>  #ifndef CONFIG_USER_ONLY
>  #include "hw/i386/apic_internal.h"
> +#include "hw/i386/apic.h"
>  #endif
>  
>  /***/
> diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
> index 37b7c562f53..4e8a6973d08 100644
> --- a/target/i386/cpu-sysemu.c
> +++ b/target/i386/cpu-sysemu.c
> @@ -30,6 +30,7 @@
>  #include "hw/qdev-properties.h"
>  
>  #include "exec/address-spaces.h"
> +#include "hw/i386/apic.h"
>  #include "hw/i386/apic_internal.h"
>  
>  #include "cpu-internal.h"
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 6b029f1bdf1..52422cbf21b 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -33,6 +33,7 @@
>  #include "standard-headers/asm-x86/kvm_para.h"
>  #include "hw/qdev-properties.h"
>  #include "hw/i386/topology.h"
> +#include "hw/i386/apic.h"
>  #ifndef CONFIG_USER_ONLY
>  #include "exec/address-spaces.h"
>  #include "hw/boards.h"
> diff --git a/target/i386/gdbstub.c b/target/i386/gdbstub.c
> index 098a2ad15a9..5438229c1a9 100644
> --- a/target/i386/gdbstub.c
> +++ b/target/i386/gdbstub.c
> @@ -21,6 +21,10 @@
>  #include "cpu.h"
>  #include "exec/gdbstub.h"
>  
> +#ifndef CONFIG_USER_ONLY
> +#include "hw/i386/apic.h"
> +#endif
> +
>  #ifdef TARGET_X86_64
>  static const int gpr_map[16] = {
>  R_EAX, R_EBX, R_ECX, R_EDX, R_ESI, R_EDI, R_EBP, R_ESP,
> diff --git a/target/i386/hax/hax-all.c b/target/i386/hax/hax-all.c
> index bf65ed6fa92..cd89e3233a9 100644
> --- a/target/i386/hax/hax-all.c
> +++ b/target/i386/hax/hax-all.c
> @@ -32,6 +32,7 @@
>  #include "sysemu/reset.h"
>  #include "sysemu/runstate.h"
>  #include "hw/boards.h"
> +#include "hw/i386/apic.h"
>  
>  #include "hax-accel-ops.h"
>  
> diff --git a/target/i386/helper.c b/target/i386/helper.c
> index 533b29cb91b..874beda98ae 100644
> --- a/target/i386/helper.c
> +++ b/target/i386/helper.c
> @@ -26,6 +26,7 @@
>  #ifndef CONFIG_USER_ONLY
>  #include "sysemu/hw_accel.h"
>  #include "monitor/monitor.h"
> +#include "hw/i386/apic.h"
>  #endif
>  
>  void cpu_sync_bndcs_hflags(CPUX86State *env)
> diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
> index 4ba6e82fab3..50058a24f2a 100644
> --- a/target/i386/hvf/hvf.c
> +++ b/target/i386/hvf/hvf.c
> @@ -70,6 +70,7 @@
>  #include 
>  
>  #include "hw/i386/apic_internal.h"
> +#include "hw/i386/apic.h"
>  #include "qemu/main-loop.h"
>  #include "qemu/accel.h"
>  #include "target/i386/cpu.h"
> diff --git a/target/i386/hvf/x86_emu.c b/target/i

Re: [PULL 1/5] hw/pcie-root-port: Fix hotplug for PCI devices requiring IO

2021-09-29 Thread Michael S. Tsirkin
On Wed, Sep 29, 2021 at 04:41:49PM +0300, Marcel Apfelbaum wrote:
> Hi Michael,
> 
> On Mon, Sep 27, 2021 at 12:49 PM Michael S. Tsirkin  wrote:
> 
> On Mon, Sep 27, 2021 at 10:33:42AM +0100, Daniel P. Berrangé wrote:
> > On Tue, Aug 03, 2021 at 04:52:03PM -0400, Michael S. Tsirkin wrote:
> > > From: Marcel Apfelbaum 
> > >
> > > Q35 has now ACPI hotplug enabled by default for PCI(e) devices.
> > > As opposed to native PCIe hotplug, guests like Fedora 34
> > > will not assign IO range to pcie-root-ports not supporting
> > > native hotplug, resulting into a regression.
> > >
> > > Reproduce by:
> > >     qemu-bin -M q35 -device pcie-root-port,id=p1 -monitor stdio
> > >     device_add e1000,bus=p1
> > > In the Guest OS the respective pcie-root-port will have the IO range
> > > disabled.
> > >
> > > Fix it by setting the "reserve-io" hint capability of the
> > > pcie-root-ports so the firmware will allocate the IO range instead.
> > >
> > > Acked-by: Igor Mammedov 
> > > Signed-off-by: Marcel Apfelbaum 
> > > Message-Id: <20210802090057.1709775-1-mar...@redhat.com>
> > > Reviewed-by: Michael S. Tsirkin 
> > > Signed-off-by: Michael S. Tsirkin 
> > > ---
> > >  hw/pci-bridge/gen_pcie_root_port.c | 5 +
> > >  1 file changed, 5 insertions(+)
> >
> > This change, when combined with the switch to ACPI based hotplug by
> > default, is responsible for a significant regression in QEMU 6.1.0
> >
> > It is no longer possible to have more than 15 pcie-root-port devices
> > added to a q35 VM in 6.1.0.  Before this I've had as many as 80+ devices
> > present before I stopped trying to add more.
> >
> >   https://gitlab.com/qemu-project/qemu/-/issues/641
> >
> > This regression is significant, because it has broken the out of the
> > box default configuration that OpenStack uses for booting all VMs.
> > They add 16 pcie-root-ports by defalt to allow empty slots for device
> > hotplug under q35 [1].
> 
> 
> Indeed, oops. Thanks for the report!
> 
> Going back and looking at seabios code, didn't we get confused?
> Shouldn't we have reserved memory and not IO?
> 
> 
> We need the IO space for the legacy PCI bridges, otherwise an empty PCI bridge
> will become unusable.

Maybe we should go back to using OSC then ... the issue
is we can't then mix acpi and native hotplug for bridges.


> 
> 
> I see:
>             int resource_optional = pcie_cap && (type ==
> PCI_REGION_TYPE_IO);
>             if (!sum && hotplug_support && !resource_optional)
>                 sum = align; /* reserve min size for hot-plug */
> 
> 
> generally maybe we should just add an ACPI-hotplug capability and
> teach seabios about it?
> 
> 
> I suppose it is possible.
> 
> Thanks,
> Marcel
>  
> 
> 
> Marcel?
> 
> > > diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/
> gen_pcie_root_port.c
> > > index ec9907917e..20099a8ae3 100644
> > > --- a/hw/pci-bridge/gen_pcie_root_port.c
> > > +++ b/hw/pci-bridge/gen_pcie_root_port.c
> > > @@ -28,6 +28,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(GenPCIERootPort,
> GEN_PCIE_ROOT_PORT)
> > >          (GEN_PCIE_ROOT_PORT_AER_OFFSET + PCI_ERR_SIZEOF)
> > > 
> > >  #define GEN_PCIE_ROOT_PORT_MSIX_NR_VECTOR       1
> > > +#define GEN_PCIE_ROOT_DEFAULT_IO_RANGE          4096
> > > 
> > >  struct GenPCIERootPort {
> > >      /*< private >*/
> > > @@ -75,6 +76,7 @@ static bool gen_rp_test_migrate_msix(void *opaque,
> int version_id)
> > >  static void gen_rp_realize(DeviceState *dev, Error **errp)
> > >  {
> > >      PCIDevice *d = PCI_DEVICE(dev);
> > > +    PCIESlot *s = PCIE_SLOT(d);
> > >      GenPCIERootPort *grp = GEN_PCIE_ROOT_PORT(d);
> > >      PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(d);
> > >      Error *local_err = NULL;
> > > @@ -85,6 +87,9 @@ static void gen_rp_realize(DeviceState *dev, Error
> **errp)
> > >          return;
> > >      }
> > > 
> > > +    if (grp->res_reserve.io == -1 && s->hotplug && 
> !s->native_hotplug)
> {
> > > +        grp->res_reserve.io = GEN_PCIE_ROOT_DEFAULT_IO_RANGE;
> > > +    }
> > >      int rc = pci_bridge_qemu_reserve_cap_init(d, 0,
> > >                                                grp->res_reserve, 
> errp);
> > > 
> > > --
> > > MST
> > >
> > >
> >
> > Regards,
> > Daniel
> >
> > [1] https://github.com/openstack/tripleo-heat-templates/blob/
> 7a6cd0640ec390a330f5699d8ed60f71b2a9f514/deployment/nova/
> nova-compute-container-puppet.yaml#L462-L472
> > --
> > |: https://berrange.com      -o-    https://www.flickr.com/photos/
> dberrange :|
> > |: https://libvirt.org         -o-            https://
> fstop138.berrange.com :|
> > |: https://entangle-photo.o

Re: [RFC 0/1] hw/ipmi: add an aspeed IPMI iBT device model

2021-09-29 Thread Titus Rwantare
On Tue, 28 Sept 2021 at 23:24, Cédric Le Goater  wrote:
>
> Hello Titus,
>
> On 9/29/21 00:39, Titus Rwantare wrote:
> > This patch follows the Handing IPMI for emulating BMC patch set by Hao Wu.
> > Building on top of the work in [PATCH] hw/misc: Add an iBT device model 
> > posted
> > by Cédric Le Goater, this iBT model works as a backend to ipmi-host-extern.
>
> Could you please resend with my patch which is still available here :
>
>   
> https://github.com/legoater/qemu/commit/c6b679690f32534e8992d96752d90d2c4aa48130
>
> or here :
>
>   
> http://patchwork.ozlabs.org/project/qemu-devel/patch/20210407171637.43-20-...@kaod.org/
>
> and in another patch, the modifications you made on top of mine. That
> will help understand where the problem could be.
>
> You can modify patches but you need to keep the original author name,
> commit log, signed-off-by to certify its origin. Then list the changes
> you have made and add your signed-off-by.
>
> The patch you sent seems to be mostly mine and I don't see any of the
> above.
>
> Here is some more info in the kernel documentation :
>
>
> https://www.kernel.org/doc/html/v4.12/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin
>
> I don't think we have the same kind of documentation in QEMU but it's
> the same process.

Thanks, I'm still learning.

>
> > Needed to run:
> > - [PATCH 4/8] hw/ipmi: Refactor IPMI interface, Hao Wu
> > - [PATCH 5/8] hw/ipmi: Take out common from ipmi_bmc_extern.c, Hao Wu
> > - [PATCH 6/8] hw/ipmi: Move handle_command to IPMICoreClass, Hao Wu
> > - [PATCH 7/8] hw/ipmi: Add an IPMI external host device, Hao Wu
>
> I have no idea what these patches do and where they are. They seem to
> modify the core IPMI framework and it is important to understand the
> overall changes. Please include them in the patchset first since they
> are initial requirements.
>

I've pushed everything here: https://github.com/Rwantare/qemu/tree/aspeed-ibt
The other patches are out for review and will likely change before merging

> > Host commandline
> >  -chardev socket,id=ipmichr1,host=localhost,port=,reconnect=10 \
> >  -device ipmi-bmc-extern,chardev=ipmichr1,id=bmc0 \
> >  -device isa-ipmi-bt,bmc=bmc0,irq=10 -nodefaults
> >
> > BMC commandline
> >  -chardev 
> > socket,id=ipmichr1,host=localhost,port=,server=on,wait=off \
> >  -device "ipmi-host-extern,chardev=ipmichr1,responder=/machine/soc/ibt"
>
> Could we have the full command lines also ?
>

Sure, there's not much else there, one of the bmc firmwares I'm testing with
is a quanta-q71l-bmc which should build from OpenBMC.

Host:
qemu-system-x86_64 \
-nographic -m 4G \
-kernel arch/x86/boot/bzImage -append "console=ttyS0 earlyprintk=ttyS0" \
-serial /dev/tty -monitor /dev/pts/9 -initrd u-root.cpio \
-chardev socket,id=ipmichr0,host=localhost,port=,reconnect=10 \
-device ipmi-bmc-extern,chardev=ipmichr0,id=bmc0 \
-device isa-ipmi-bt,bmc=bmc0,irq=10 -nodefaults

BMC:
qemu-system-arm \
-machine quanta-q71l-bmc \
-nographic \
-drive file=path/to/openbmc/image,if=mtd,bus=0,unit=0,snapshot=on \
-chardev socket,id=ipmichr1,host=localhost,port=,server=on,wait=off \
-device "ipmi-host-extern,chardev=ipmichr1,responder=/machine/soc/ibt"

>
> > Currently, IRQs are not functional, but it is able to connect.
> > The following printout is from the host:
> >
> > [6.869742] ipmi_si IPI0001:00: IPMI message handler: Found new BMC 
> > (man_id:
> >   0x002b79, prod_id: 0x, dev_id: 0x00)
> > [   12.393281] ipmi_si IPI0001:00: IPMI bt interface initialized
> >
> > This patch is an RFC because of the missing IRQs and the need for other 
> > patches
> > to get merged.
>
> We will need the whole project to start reviewing.
>
> Thanks,
>
> C.

Noted, I think I should be able to send the BT commits from
https://github.com/Rwantare/qemu/tree/aspeed-ibt
after the others are reviewed and merged.
For now, the irq setup is my point of confusion.

Thanks,
-Titus



[PULL v2 00/19] NBD patches through 2021-09-27

2021-09-29 Thread Eric Blake
The following changes since commit 6b54a31bf7b403672a798b6443b1930ae6c74dea:

  Merge remote-tracking branch 'remotes/jsnow-gitlab/tags/python-pull-request' 
into staging (2021-09-28 13:07:32 +0100)

are available in the Git repository at:

  https://repo.or.cz/qemu/ericb.git tags/pull-nbd-2021-09-27-v2

for you to fetch changes up to 1af7737871fb3b66036f5e520acb0a98fc2605f7:

  block/nbd: check that received handle is valid (2021-09-29 13:46:33 -0500)

v2: defer problematic selinux patch; sending cover letter only since
remaining patches are unchanged


nbd patches for 2021-09-27

- Vladimir Sementsov-Ogievskiy: Rework coroutines of qemu NBD client
  to improve reconnect support
- Eric Blake: Relax server in regards to NBD_OPT_LIST_META_CONTEXT
- Vladimir Sementsov-Ogievskiy: Plumb up 64-bit bulk-zeroing support
  in block layer, in preparation for future NBD spec extensions
- Nir Soffer: Default to writeback cache in qemu-nbd


Eric Blake (1):
  nbd/server: Allow LIST_META_CONTEXT without STRUCTURED_REPLY

Nir Soffer (1):
  qemu-nbd: Change default cache mode to writeback

Vladimir Sementsov-Ogievskiy (17):
  block/io: bring request check to bdrv_co_(read,write)v_vmstate
  qcow2: check request on vmstate save/load path
  block: use int64_t instead of uint64_t in driver read handlers
  block: use int64_t instead of uint64_t in driver write handlers
  block: use int64_t instead of uint64_t in copy_range driver handlers
  block: make BlockLimits::max_pwrite_zeroes 64bit
  block: use int64_t instead of int in driver write_zeroes handlers
  block/io: allow 64bit write-zeroes requests
  block: make BlockLimits::max_pdiscard 64bit
  block: use int64_t instead of int in driver discard handlers
  block/io: allow 64bit discard requests
  nbd/client-connection: nbd_co_establish_connection(): fix non set errp
  block/nbd: nbd_channel_error() shutdown channel unconditionally
  block/nbd: move nbd_recv_coroutines_wake_all() up
  block/nbd: refactor nbd_recv_coroutines_wake_all()
  block/nbd: drop connection_co
  block/nbd: check that received handle is valid

 docs/tools/qemu-nbd.rst  |   6 +-
 include/block/block_int.h|  66 +++---
 block/io.c   |  44 +++-
 block/blkdebug.c |  12 +-
 block/blklogwrites.c |  16 +-
 block/blkreplay.c|   8 +-
 block/blkverify.c|   8 +-
 block/bochs.c|   4 +-
 block/cloop.c|   4 +-
 block/commit.c   |   2 +-
 block/copy-before-write.c|  15 +-
 block/copy-on-read.c |  19 +-
 block/crypto.c   |   8 +-
 block/curl.c |   3 +-
 block/dmg.c  |   4 +-
 block/file-posix.c   |  35 ++--
 block/file-win32.c   |   8 +-
 block/filter-compress.c  |  15 +-
 block/gluster.c  |  13 +-
 block/iscsi.c|  58 +++--
 block/mirror.c   |   8 +-
 block/nbd.c  | 443 ---
 block/nfs.c  |  12 +-
 block/null.c |  18 +-
 block/nvme.c |  48 -
 block/preallocate.c  |  14 +-
 block/qcow.c |  16 +-
 block/qcow2-cluster.c|  14 +-
 block/qcow2.c|  70 +--
 block/qed.c  |   9 +-
 block/quorum.c   |  11 +-
 block/raw-format.c   |  36 ++--
 block/rbd.c  |  20 +-
 block/throttle.c |  18 +-
 block/vdi.c  |   8 +-
 block/vmdk.c |  14 +-
 block/vpc.c  |   8 +-
 block/vvfat.c|   8 +-
 nbd/client-connection.c  |   1 +
 nbd/client.c |   2 -
 nbd/server.c |   2 +-
 qemu-nbd.c   |   6 +-
 tests/unit/test-bdrv-drain.c |  16 +-
 tests/unit/test-block-iothread.c |  21 +-
 block/trace-events   |  10 +-
 45 files changed, 585 insertions(+), 596 deletions(-)

-- 
2.31.1




Re: [PULL 00/33] x86 and misc changes for 2021-09-28

2021-09-29 Thread Peter Maydell
On Tue, 28 Sept 2021 at 14:04, Paolo Bonzini  wrote:
>
> The following changes since commit 14f02d8a9ec1746823c106933a4c8f062f9e0f95:
>
>   Merge remote-tracking branch 
> 'remotes/philmd/tags/integration-testing-20210927' into staging (2021-09-27 
> 19:52:43 +0100)
>
> are available in the Git repository at:
>
>   https://gitlab.com/bonzini/qemu.git tags/for-upstream
>
> for you to fetch changes up to 824ba1e99c8bc12048636ea43dec923385ff042f:
>
>   meson_options.txt: Switch the default value for the vnc option to 'auto' 
> (2021-09-28 14:50:14 +0200)
>
> 
> * SGX implementation for x86
> * Miscellaneous bugfixes
> * Fix dependencies from ROMs to qtests
>
> 

This causes "make check" to hang on my local x86-64 machine
(consistently). Last output printed is:

Not run: 220 287
Passed all 121 iotests
make: Leaving directory '/mnt/nvmedisk/linaro/qemu-for-merges/build/all'
make: Entering directory '/mnt/nvmedisk/linaro/qemu-for-merges/build/all'
MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
QTEST_QEMU_IMG=./qemu-img
G_TEST_DBUS_DAEMON=/mnt/nvmedisk/linaro/qemu-for-merges/tests/dbus-vmstate-daemon.sh
QTEST_QEMU_BINARY=./qemu-system-i386
QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
tests/qtest/pxe-test --tap -k
Broken pipe
ERROR qtest-i386/pxe-test - too few tests run (expected 4, got 0)
Makefile.mtest:664: recipe for target 'run-test-81' failed
make: *** [run-test-81] Error 1
make: Leaving directory '/mnt/nvmedisk/linaro/qemu-for-merges/build/all'
make: *** Waiting for unfinished jobs
make: Entering directory '/mnt/nvmedisk/linaro/qemu-for-merges/build/all'
MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
QTEST_QEMU_IMG=./qemu-img
G_TEST_DBUS_DAEMON=/mnt/nvmedisk/linaro/qemu-for-merges/tests/dbus-vmstate-daemon.sh
QTEST_QEMU_BINARY=./qemu-system-i386
QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
tests/qtest/bios-tables-test --tap -k
Broken pipe
ERROR qtest-i386/bios-tables-test - too few tests run (expected 37, got 0)
Makefile.mtest:368: recipe for target 'run-test-44' failed
make: *** [run-test-44] Error 1
make: Leaving directory '/mnt/nvmedisk/linaro/qemu-for-merges/build/all'

and tests/qtest/migration-test, tests/qtest/vmgenid-test and
tests/qtest/cdrom-test are all still running, but hung,
for a qemu-system-i386 guest.

Backtraces for cdrom-test; both processes are at 95%+ CPU
usage, so they're livelocked I guess.
The vmgenid-test hang looks similar to this one, with the test process
in qtest_qmp_receive and the QEMU process in pci_host_config_write_common().


cdrom-test(29050)---qemu-system-i38(29059)
===
PROCESS: 29050
petmay01 29050 29048 94 18:23 pts/302:42:16 tests/qtest/cdrom-test
--tap -k -m quick
[New LWP 29052]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
0x7f56c73ee557 in g_list_last () from
/usr/lib/x86_64-linux-gnu/libglib-2.0.so.0

Thread 2 (Thread 0x7f56c6785700 (LWP 29052)):
#0  syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
#1  0x5586532a767b in qemu_futex_wait (val=,
f=) at
/mnt/nvmedisk/linaro/qemu-for-merges/include/qemu/futex.h:29
#2  qemu_event_wait (ev=ev@entry=0x5586534d4c68
) at ../../util/qemu-thread-posix.c:480
#3  0x5586532a5428 in call_rcu_thread (opaque=opaque@entry=0x0) at
../../util/rcu.c:258
#4  0x5586532a6796 in qemu_thread_start (args=) at
../../util/qemu-thread-posix.c:557
#5  0x7f56c6df06db in start_thread (arg=0x7f56c6785700) at
pthread_create.c:463
#6  0x7f56c6b1971f in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:95

Thread 1 (Thread 0x7f56c78af180 (LWP 29050)):
#0  0x7f56c73ee557 in g_list_last () from
/usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#1  0x7f56c73ee5b8 in g_list_append () from
/usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#2  0x55865328744f in qtest_qmp_receive (s=0x558654965c00) at
../../tests/qtest/libqtest.c:642
#3  0x558653287c62 in qtest_vqmp (s=,
fmt=, ap=ap@entry=0x7ffe72cb7160) at
../../tests/qtest/libqtest.c:749
#4  0x558653287f34 in qtest_qmp (s=s@entry=0x558654965c00,
fmt=fmt@entry=0x5586532b523d "{ 'execute': 'query-status' }") at
../../tests/qtest/libqtest.c:790
#5  0x5586532862db in boot_sector_test (qts=0x558654965c00) at
../../tests/qtest/boot-sector.c:161
#6  0x558653285fa4 in test_cdboot (data=) at
../../tests/qtest/cdrom-test.c:125
#7  0x7f56c741905a in ?? () from /usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#8  0x7f56c7418f8b in ?? () from /usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#9  0x7f56c7418f8b in ?? () from /usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#10 0x7f56c7418f8b in ?? () from /usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#11 0x7f56c7419232 in g_test_run_suite () from
/usr/lib/x86_64-linux-gnu

Re: [PATCH v3 2/3] QIOChannelSocket: Implement io_async_write & io_async_flush

2021-09-29 Thread Peter Xu
On Wed, Sep 29, 2021 at 04:36:10PM -0300, Leonardo Bras Soares Passos wrote:
> On Tue, Sep 28, 2021 at 7:45 PM Peter Xu  wrote:
> >
> > On Wed, Sep 22, 2021 at 07:24:22PM -0300, Leonardo Bras wrote:
> > > +static void qio_channel_socket_async_flush(QIOChannel *ioc,
> > > +   Error **errp)
> > > +{
> > > +QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> > > +struct msghdr msg = {};
> > > +struct pollfd pfd;
> > > +struct sock_extended_err *serr;
> > > +struct cmsghdr *cm;
> > > +char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
> > > +int ret;
> > > +
> > > +memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
> > > +msg.msg_control = control;
> > > +msg.msg_controllen = sizeof(control);
> > > +
> > > +while (sioc->async_sent < sioc->async_queued) {
> > > +ret = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
> > > +if (ret < 0) {
> > > +if (errno == EAGAIN) {
> > > +/* Nothing on errqueue, wait */
> > > +pfd.fd = sioc->fd;
> > > +pfd.events = 0;
> > > +ret = poll(&pfd, 1, 250);
> > > +if (ret == 0) {
> > > +/*
> > > + * Timeout : After 250ms without receiving any 
> > > zerocopy
> > > + * notification, consider all data as sent.
> > > + */
> > > +break;
> >
> > After a timeout, we'll break the while loop and continue parsing an invalid
> > msg [1].  Is that what we want?
> 
> No, the point here was returning from flush if this (long) timeout
> happened, as in
> "if asso long has passed, there must be no pending send", which I
> agree is quite bad,
> but it was all I could think to avoid an infinite loop here if
> something goes wrong.

IMHO it's the same when we write() to a socket but the buffer is always full,
we'll simply block there until it has some space.  I don't know what we can do
here besides infinite loop on the timeout - we shouldn't eat the cpu all, but
we should still wait?

-- 
Peter Xu




Re: [PATCH v3 0/3] QIOChannel async_write & async_flush + MSG_ZEROCOPY + multifd

2021-09-29 Thread Leonardo Bras Soares Passos
On Wed, Sep 29, 2021 at 4:23 PM Peter Xu  wrote:
>
> On Wed, Sep 29, 2021 at 03:34:01PM -0300, Leonardo Bras Soares Passos wrote:
> > You mean like I did in pre-V1, enabling ZEROCOPY for multifd it in QMP?
> > Or is this something else?
>
> I mean any way to still be able to turn zerocopy off? E.g. when the user has 
> no
> privilege on mlock.  Thanks,
>
> --
> Peter Xu
>

Yeah, that makes sense in the new approach of failing when zerocopy is
not possible.
I will make sure to do the in v3.

Best regards,
Leonardo




Re: [PATCH v3 3/3] multifd: Send using asynchronous write on nocomp to send RAM pages.

2021-09-29 Thread Leonardo Bras Soares Passos
On Tue, Sep 28, 2021 at 7:49 PM Peter Xu  wrote:
>
> On Fri, Sep 24, 2021 at 06:43:49PM +0100, Daniel P. Berrangé wrote:
> > > @@ -546,6 +546,7 @@ void multifd_save_cleanup(void)
> > >  MultiFDSendParams *p = &multifd_send_state->params[i];
> > >  Error *local_err = NULL;
> > >
> > > +qio_channel_async_flush(p->c, NULL);
> > >  socket_send_channel_destroy(p->c);
> > >  p->c = NULL;
> > >  qemu_mutex_destroy(&p->mutex);
> >
> > This isn't reliable beucase qio_channel_async_flush will return early
> > even if not everything is flushed.
>
> Right, though I think the problem is in patch 2 where we should make sure
> flush() will only return if all data sent.
>
> And at the meantime we may also want to move it to before we send the devices
> states for both pre/post copy.  multifd_save_cleanup() is called only until
> migration completed and we're cleaning stuffs, I'm afraid it's too late, so
> potentially the device states can arrive and dest vm running without the 
> latest
> src VM memories.

Thanks for that Peter!
I had some trouble sorting out when to flush, and I think I still missed this.

I will have thet improved for v3.

>
> --
> Peter Xu
>

Best regards,
Leonardo




[PATCH v3 09/13] qapi/parser: add import cycle workaround

2021-09-29 Thread John Snow
There is a cycle that exists in the QAPI generator: [schema -> expr ->
parser -> schema]. It exists because the QAPIDoc class needs the names
of types defined by the schema module, but the schema module needs to
import both expr.py/parser.py to do its actual parsing.

Ultimately, the layering violation is that parser.py should not have any
knowledge of specifics of the Schema. QAPIDoc performs double-duty here
both as a parser *and* as a finalized object that is part of the schema.

I see three paths here:

(1) Just use the TYPE_CHECKING trick to eliminate the cycle which is only
present during static analysis.

(2) Don't bother to annotate connect_member() et al, give them 'object'
or 'Any'. I don't particularly like this, because it diminishes the
usefulness of type hints for documentation purposes. Still, it's an
extremely quick fix.

(3) Reimplement doc <--> definition correlation directly in schema.py,
integrating doc fields directly into QAPISchemaMember and relieving
the QAPIDoc class of the responsibility. Users of the information
would instead visit the members first and retrieve their
documentation instead of the inverse operation -- visiting the
documentation and retrieving their members.

I prefer (3), but (1) is the easiest way to have my cake (strong type
hints) and eat it too (Not have import cycles). Do (1) for now, but plan
for (3). See also:
https://mypy.readthedocs.io/en/latest/runtime_troubles.html#import-cycles

Signed-off-by: John Snow 
---
 scripts/qapi/parser.py | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index 123fc2f099c..30b1d98df0b 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py
@@ -18,6 +18,7 @@
 import os
 import re
 from typing import (
+TYPE_CHECKING,
 Dict,
 List,
 Optional,
@@ -30,6 +31,12 @@
 from .source import QAPISourceInfo
 
 
+if TYPE_CHECKING:
+# pylint: disable=cyclic-import
+# TODO: Remove cycle. [schema -> expr -> parser -> schema]
+from .schema import QAPISchemaFeature, QAPISchemaMember
+
+
 # Return value alias for get_expr().
 _ExprValue = Union[List[object], Dict[str, object], str, bool]
 
@@ -473,9 +480,9 @@ def append(self, line):
 class ArgSection(Section):
 def __init__(self, parser, name, indent=0):
 super().__init__(parser, name, indent)
-self.member = None
+self.member: Optional['QAPISchemaMember'] = None
 
-def connect(self, member):
+def connect(self, member: 'QAPISchemaMember') -> None:
 self.member = member
 
 class NullSection(Section):
@@ -750,14 +757,14 @@ def _append_freeform(self, line):
  % match.group(1))
 self._section.append(line)
 
-def connect_member(self, member):
+def connect_member(self, member: 'QAPISchemaMember') -> None:
 if member.name not in self.args:
 # Undocumented TODO outlaw
 self.args[member.name] = QAPIDoc.ArgSection(self._parser,
 member.name)
 self.args[member.name].connect(member)
 
-def connect_feature(self, feature):
+def connect_feature(self, feature: 'QAPISchemaFeature') -> None:
 if feature.name not in self.features:
 raise QAPISemError(feature.info,
"feature '%s' lacks documentation"
-- 
2.31.1




[PATCH v3 11/13] qapi/parser: enable mypy checks

2021-09-29 Thread John Snow
Signed-off-by: John Snow 

---

As always, this can be merged with the previous commit.

Signed-off-by: John Snow 
---
 scripts/qapi/mypy.ini | 5 -
 1 file changed, 5 deletions(-)

diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini
index 54ca4483d6d..66253564297 100644
--- a/scripts/qapi/mypy.ini
+++ b/scripts/qapi/mypy.ini
@@ -3,11 +3,6 @@ strict = True
 disallow_untyped_calls = False
 python_version = 3.6
 
-[mypy-qapi.parser]
-disallow_untyped_defs = False
-disallow_incomplete_defs = False
-check_untyped_defs = False
-
 [mypy-qapi.schema]
 disallow_untyped_defs = False
 disallow_incomplete_defs = False
-- 
2.31.1




[PATCH v3 13/13] qapi/parser: enable pylint checks

2021-09-29 Thread John Snow
Signed-off-by: John Snow 

---

This can be merged with the previous commit, if desired.

Signed-off-by: John Snow 
---
 scripts/qapi/pylintrc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/qapi/pylintrc b/scripts/qapi/pylintrc
index 5b7dbc58ad8..b259531a726 100644
--- a/scripts/qapi/pylintrc
+++ b/scripts/qapi/pylintrc
@@ -2,8 +2,7 @@
 
 # Add files or directories matching the regex patterns to the ignore list.
 # The regex matches against base names, not paths.
-ignore-patterns=parser.py,
-schema.py,
+ignore-patterns=schema.py,
 
 
 [MESSAGES CONTROL]
-- 
2.31.1




[PATCH v3 05/13] qapi/parser: improve detection of '@symbol:' preface

2021-09-29 Thread John Snow
Leading and trailing whitespace are now discarded, addressing the FIXME
comment. A new error is raised to detect this accidental case.

Parsing for args sections is left alone here; the 'name' variable is
moved into the only block where it is used.

Signed-off-by: John Snow 

---

Tangentially related to delinting in that removing 'FIXME' comments is a
goal for pylint. My goal is to allow 'TODO' to be checked in, but
'FIXME' should be fixed prior to inclusion.

Arbitrary, but that's life for you.

Signed-off-by: John Snow 
---
 scripts/qapi/parser.py  | 13 -
 tests/qapi-schema/doc-whitespace-leading-symbol.err |  1 +
 .../qapi-schema/doc-whitespace-leading-symbol.json  |  6 ++
 tests/qapi-schema/doc-whitespace-leading-symbol.out |  0
 .../qapi-schema/doc-whitespace-trailing-symbol.err  |  1 +
 .../qapi-schema/doc-whitespace-trailing-symbol.json |  6 ++
 .../qapi-schema/doc-whitespace-trailing-symbol.out  |  0
 tests/qapi-schema/meson.build   |  2 ++
 8 files changed, 24 insertions(+), 5 deletions(-)
 create mode 100644 tests/qapi-schema/doc-whitespace-leading-symbol.err
 create mode 100644 tests/qapi-schema/doc-whitespace-leading-symbol.json
 create mode 100644 tests/qapi-schema/doc-whitespace-leading-symbol.out
 create mode 100644 tests/qapi-schema/doc-whitespace-trailing-symbol.err
 create mode 100644 tests/qapi-schema/doc-whitespace-trailing-symbol.json
 create mode 100644 tests/qapi-schema/doc-whitespace-trailing-symbol.out

diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index bfd2dbfd9a2..2f93a752f66 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py
@@ -549,18 +549,21 @@ def _append_body_line(self, line):
 
 Else, append the line to the current section.
 """
-name = line.split(' ', 1)[0]
-# FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
-# recognized, and get silently treated as ordinary text
-if not self.symbol and not self.body.text and line.startswith('@'):
-if not line.endswith(':'):
+stripped = line.strip()
+
+if not self.symbol and not self.body.text and stripped.startswith('@'):
+if not stripped.endswith(':'):
 raise QAPIParseError(self._parser, "line should end with ':'")
+if not stripped == line:
+raise QAPIParseError(
+self._parser, "extra whitespace around symbol declaration")
 self.symbol = line[1:-1]
 # FIXME invalid names other than the empty string aren't flagged
 if not self.symbol:
 raise QAPIParseError(self._parser, "invalid name")
 elif self.symbol:
 # This is a definition documentation block
+name = line.split(' ', 1)[0]
 if name.startswith('@') and name.endswith(':'):
 self._append_line = self._append_args_line
 self._append_args_line(line)
diff --git a/tests/qapi-schema/doc-whitespace-leading-symbol.err 
b/tests/qapi-schema/doc-whitespace-leading-symbol.err
new file mode 100644
index 000..785468b90e2
--- /dev/null
+++ b/tests/qapi-schema/doc-whitespace-leading-symbol.err
@@ -0,0 +1 @@
+doc-whitespace-leading-symbol.json:4:1: extra whitespace around symbol 
declaration
diff --git a/tests/qapi-schema/doc-whitespace-leading-symbol.json 
b/tests/qapi-schema/doc-whitespace-leading-symbol.json
new file mode 100644
index 000..128c781bec9
--- /dev/null
+++ b/tests/qapi-schema/doc-whitespace-leading-symbol.json
@@ -0,0 +1,6 @@
+# Documentation for expression has leading whitespace
+
+##
+#  @leading-whitespace:
+##
+{ 'command': 'leading-whitespace', 'data': {'a': 'int'} }
diff --git a/tests/qapi-schema/doc-whitespace-leading-symbol.out 
b/tests/qapi-schema/doc-whitespace-leading-symbol.out
new file mode 100644
index 000..e69de29bb2d
diff --git a/tests/qapi-schema/doc-whitespace-trailing-symbol.err 
b/tests/qapi-schema/doc-whitespace-trailing-symbol.err
new file mode 100644
index 000..fe583b38008
--- /dev/null
+++ b/tests/qapi-schema/doc-whitespace-trailing-symbol.err
@@ -0,0 +1 @@
+doc-whitespace-trailing-symbol.json:4:1: extra whitespace around symbol 
declaration
diff --git a/tests/qapi-schema/doc-whitespace-trailing-symbol.json 
b/tests/qapi-schema/doc-whitespace-trailing-symbol.json
new file mode 100644
index 000..da706c3d176
--- /dev/null
+++ b/tests/qapi-schema/doc-whitespace-trailing-symbol.json
@@ -0,0 +1,6 @@
+# Documentation for expression has extra whitespace
+
+##
+# @trailing-whitespace:  
+##
+{ 'command': 'trailing-whitespace', 'data': {'a': 'int'} }
diff --git a/tests/qapi-schema/doc-whitespace-trailing-symbol.out 
b/tests/qapi-schema/doc-whitespace-trailing-symbol.out
new file mode 100644
index 000..e69de29bb2d
diff --git a/tests/qapi-schema/meson.build b/tests/qapi-schema/meson.build
index 6187efbd58f..64ffbd1b3d4 100644
--- a/tests/qapi-s

[PATCH v3 08/13] qapi/parser: Introduce NullSection

2021-09-29 Thread John Snow
Here's the weird bit. QAPIDoc generally expects -- virtually everywhere
-- that it will always have a current section. The sole exception to
this is in the case that end_comment() is called, which leaves us with
*no* section. However, in this case, we also don't expect to actually
ever mutate the comment contents ever again.

NullSection is just a Null-object that allows us to maintain the
invariant that we *always* have a current section, enforced by static
typing -- allowing us to type that field as QAPIDoc.Section instead of
the more ambiguous Optional[QAPIDoc.Section].

end_section is renamed to switch_section and now accepts as an argument
the new section to activate, clarifying that no callers ever just
unilaterally end a section; they only do so when starting a new section.

Signed-off-by: John Snow 

---

For my money: Optional types can be a nuisance because an unfamiliar
reader may wonder in what circumstances the field may be unset. This
makes the condition quite a bit more explicit and statically provable.

Doing it in this way (and not by creating a dummy section) will also
continue to reject (rather noisily) any erroneous attempts to append
additional lines after end_comment() has been called.

Also, this section isn't indexed into .sections[] and isn't really
visible in any way to external users of the class, so it seems like a
harmless and low-cost way to formalize the "life cycle" of a QAPIDoc
parser.

Clean and clear as I can make it, in as few lines as I could muster.

Signed-off-by: John Snow 
---
 scripts/qapi/parser.py | 27 ---
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index 1fdc5bc7056..123fc2f099c 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py
@@ -478,6 +478,13 @@ def __init__(self, parser, name, indent=0):
 def connect(self, member):
 self.member = member
 
+class NullSection(Section):
+"""
+Empty section that signifies the end of a doc block.
+"""
+def append(self, line):
+assert False, "BUG: Text appended after end_comment() called."
+
 def __init__(self, parser, info):
 # self._parser is used to report errors with QAPIParseError.  The
 # resulting error position depends on the state of the parser.
@@ -525,7 +532,7 @@ def append(self, line):
 self._append_line(line)
 
 def end_comment(self):
-self._end_section()
+self._switch_section(QAPIDoc.NullSection(self._parser))
 
 @staticmethod
 def _is_section_tag(name):
@@ -702,9 +709,9 @@ def _start_symbol_section(self, symbols_dict, name, indent):
 raise QAPIParseError(self._parser,
  "'%s' parameter name duplicated" % name)
 assert not self.sections
-self._end_section()
-self._section = QAPIDoc.ArgSection(self._parser, name, indent)
-symbols_dict[name] = self._section
+new_section = QAPIDoc.ArgSection(self._parser, name, indent)
+self._switch_section(new_section)
+symbols_dict[name] = new_section
 
 def _start_args_section(self, name, indent):
 self._start_symbol_section(self.args, name, indent)
@@ -716,13 +723,11 @@ def _start_section(self, name=None, indent=0):
 if name in ('Returns', 'Since') and self.has_section(name):
 raise QAPIParseError(self._parser,
  "duplicated '%s' section" % name)
-self._end_section()
-self._section = QAPIDoc.Section(self._parser, name, indent)
-self.sections.append(self._section)
-
-def _end_section(self):
-assert self._section is not None
+new_section = QAPIDoc.Section(self._parser, name, indent)
+self._switch_section(new_section)
+self.sections.append(new_section)
 
+def _switch_section(self, new_section):
 text = self._section.text = self._section.text.strip()
 
 # Only the 'body' section is allowed to have an empty body.
@@ -735,7 +740,7 @@ def _end_section(self):
 self._parser,
 "empty doc section '%s'" % self._section.name)
 
-self._section = None
+self._section = new_section
 
 def _append_freeform(self, line):
 match = re.match(r'(@\S+:)', line)
-- 
2.31.1




[PATCH v3 12/13] qapi/parser: Silence too-few-public-methods warning

2021-09-29 Thread John Snow
Eh. Not worth the fuss today. There are bigger fish to fry.

Signed-off-by: John Snow 
---
 scripts/qapi/parser.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index 8a846079207..7511eedaa35 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py
@@ -457,8 +457,10 @@ class QAPIDoc:
 """
 
 class Section:
+# pylint: disable=too-few-public-methods
 def __init__(self, parser: QAPISchemaParser,
  name: Optional[str] = None, indent: int = 0):
+
 # parser, for error messages about indentation
 self._parser = parser
 # optional section name (argument/member or section name)
@@ -494,6 +496,7 @@ class NullSection(Section):
 """
 Empty section that signifies the end of a doc block.
 """
+# pylint: disable=too-few-public-methods
 def append(self, line: str) -> None:
 assert False, "BUG: Text appended after end_comment() called."
 
-- 
2.31.1




[PATCH v3 07/13] qapi/parser: Simplify _end_section()

2021-09-29 Thread John Snow
The "if self._section" clause in end_section is mysterious: In which
circumstances might we end a section when we don't have one?

QAPIDoc always expects there to be a "current section", only except
after a call to end_comment(). This actually *shouldn't* ever be 'None',
so let's remove that logic so I don't wonder why it's like this again in
three months.

Signed-off-by: John Snow 
---
 scripts/qapi/parser.py | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index 52748e8e462..1fdc5bc7056 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py
@@ -721,13 +721,21 @@ def _start_section(self, name=None, indent=0):
 self.sections.append(self._section)
 
 def _end_section(self):
-if self._section:
-text = self._section.text = self._section.text.strip()
-if self._section.name and (not text or text.isspace()):
-raise QAPIParseError(
-self._parser,
-"empty doc section '%s'" % self._section.name)
-self._section = None
+assert self._section is not None
+
+text = self._section.text = self._section.text.strip()
+
+# Only the 'body' section is allowed to have an empty body.
+# All other sections, including anonymous ones, must have text.
+if self._section != self.body and not text:
+# We do not create anonymous sections unless there is
+# something to put in them; this is a parser bug.
+assert self._section.name
+raise QAPIParseError(
+self._parser,
+"empty doc section '%s'" % self._section.name)
+
+self._section = None
 
 def _append_freeform(self, line):
 match = re.match(r'(@\S+:)', line)
-- 
2.31.1




[PATCH v3 04/13] qapi: Add spaces after symbol declaration for consistency

2021-09-29 Thread John Snow
Several QGA definitions omit a blank line after the symbol
declaration. This works OK currently, but it's the only place where we
do this. Adjust it for consistency.

Future commits may wind up enforcing this formatting.

Signed-off-by: John Snow 

---

This isn't strictly necessary and I don't actually get around to
enforcing it in this series, but I figured I'd share it with the list
anyway. We can just drop this patch but I wanted to see your thoughts.

Signed-off-by: John Snow 
---
 qapi/block-core.json| 1 +
 qga/qapi-schema.json| 3 +++
 tests/qapi-schema/doc-good.json | 8 
 3 files changed, 12 insertions(+)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 4114f8b6fc3..52a6dae9522 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3132,6 +3132,7 @@
 
 ##
 # @BlockdevQcow2EncryptionFormat:
+#
 # @aes: AES-CBC with plain64 initialization vectors
 #
 # Since: 2.10
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index c60f5e669d7..94e4aacdcc6 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -1140,6 +1140,7 @@
 
 ##
 # @GuestExec:
+#
 # @pid: pid of child process in guest OS
 #
 # Since: 2.5
@@ -1171,6 +1172,7 @@
 
 ##
 # @GuestHostName:
+#
 # @host-name: Fully qualified domain name of the guest OS
 #
 # Since: 2.10
@@ -1197,6 +1199,7 @@
 
 ##
 # @GuestUser:
+#
 # @user: Username
 # @domain: Logon domain (windows only)
 # @login-time: Time of login of this user on the computer. If multiple
diff --git a/tests/qapi-schema/doc-good.json b/tests/qapi-schema/doc-good.json
index a20acffd8b9..86dc25d2bd8 100644
--- a/tests/qapi-schema/doc-good.json
+++ b/tests/qapi-schema/doc-good.json
@@ -53,6 +53,7 @@
 
 ##
 # @Enum:
+#
 # @one: The _one_ {and only}
 #
 # Features:
@@ -67,6 +68,7 @@
 
 ##
 # @Base:
+#
 # @base1:
 # the first member
 ##
@@ -75,6 +77,7 @@
 
 ##
 # @Variant1:
+#
 # A paragraph
 #
 # Another paragraph (but no @var: line)
@@ -91,11 +94,13 @@
 
 ##
 # @Variant2:
+#
 ##
 { 'struct': 'Variant2', 'data': {} }
 
 ##
 # @Object:
+#
 # Features:
 # @union-feat1: a feature
 ##
@@ -109,6 +114,7 @@
 
 ##
 # @Alternate:
+#
 # @i: an integer
 # @b is undocumented
 #
@@ -126,6 +132,7 @@
 
 ##
 # @cmd:
+#
 # @arg1: the first argument
 #
 # @arg2: the second
@@ -175,6 +182,7 @@
 
 ##
 # @EVT_BOXED:
+#
 # Features:
 # @feat3: a feature
 ##
-- 
2.31.1




[PATCH v3 10/13] qapi/parser: add type hint annotations (QAPIDoc)

2021-09-29 Thread John Snow
Annotations do not change runtime behavior.
This commit consists of only annotations.

Signed-off-by: John Snow 
---
 scripts/qapi/parser.py | 67 --
 1 file changed, 39 insertions(+), 28 deletions(-)

diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index 30b1d98df0b..8a846079207 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py
@@ -37,6 +37,9 @@
 from .schema import QAPISchemaFeature, QAPISchemaMember
 
 
+#: Represents a single Top Level QAPI schema expression.
+TopLevelExpr = Dict[str, object]
+
 # Return value alias for get_expr().
 _ExprValue = Union[List[object], Dict[str, object], str, bool]
 
@@ -454,7 +457,8 @@ class QAPIDoc:
 """
 
 class Section:
-def __init__(self, parser, name=None, indent=0):
+def __init__(self, parser: QAPISchemaParser,
+ name: Optional[str] = None, indent: int = 0):
 # parser, for error messages about indentation
 self._parser = parser
 # optional section name (argument/member or section name)
@@ -463,7 +467,7 @@ def __init__(self, parser, name=None, indent=0):
 # the expected indent level of the text of this section
 self._indent = indent
 
-def append(self, line):
+def append(self, line: str) -> None:
 # Strip leading spaces corresponding to the expected indent level
 # Blank lines are always OK.
 if line:
@@ -478,7 +482,8 @@ def append(self, line):
 self.text += line.rstrip() + '\n'
 
 class ArgSection(Section):
-def __init__(self, parser, name, indent=0):
+def __init__(self, parser: QAPISchemaParser,
+ name: str, indent: int = 0):
 super().__init__(parser, name, indent)
 self.member: Optional['QAPISchemaMember'] = None
 
@@ -489,35 +494,34 @@ class NullSection(Section):
 """
 Empty section that signifies the end of a doc block.
 """
-def append(self, line):
+def append(self, line: str) -> None:
 assert False, "BUG: Text appended after end_comment() called."
 
-def __init__(self, parser, info):
+def __init__(self, parser: QAPISchemaParser, info: QAPISourceInfo):
 # self._parser is used to report errors with QAPIParseError.  The
 # resulting error position depends on the state of the parser.
 # It happens to be the beginning of the comment.  More or less
 # servicable, but action at a distance.
 self._parser = parser
 self.info = info
-self.symbol = None
+self.symbol: Optional[str] = None
 self.body = QAPIDoc.Section(parser)
-# dict mapping parameter name to ArgSection
-self.args = OrderedDict()
-self.features = OrderedDict()
-# a list of Section
-self.sections = []
+# dicts mapping parameter/feature names to their ArgSection
+self.args: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
+self.features: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
+self.sections: List[QAPIDoc.Section] = []
 # the current section
 self._section = self.body
 self._append_line = self._append_body_line
 
-def has_section(self, name):
+def has_section(self, name: str) -> bool:
 """Return True if we have a section with this name."""
 for i in self.sections:
 if i.name == name:
 return True
 return False
 
-def append(self, line):
+def append(self, line: str) -> None:
 """
 Parse a comment line and add it to the documentation.
 
@@ -538,18 +542,18 @@ def append(self, line):
 line = line[1:]
 self._append_line(line)
 
-def end_comment(self):
+def end_comment(self) -> None:
 self._switch_section(QAPIDoc.NullSection(self._parser))
 
 @staticmethod
-def _is_section_tag(name):
+def _is_section_tag(name: str) -> bool:
 return name in ('Returns:', 'Since:',
 # those are often singular or plural
 'Note:', 'Notes:',
 'Example:', 'Examples:',
 'TODO:')
 
-def _append_body_line(self, line):
+def _append_body_line(self, line: str) -> None:
 """
 Process a line of documentation text in the body section.
 
@@ -594,7 +598,7 @@ def _append_body_line(self, line):
 # This is a free-form documentation block
 self._append_freeform(line)
 
-def _append_args_line(self, line):
+def _append_args_line(self, line: str) -> None:
 """
 Process a line of documentation text in an argument section.
 
@@ -640,7 +644,7 @@ def _append_args_line(self, line):
 
 self._append_freeform(line)
 
-def _append_features_line(self, line):
+def _append_features_line(self, line: str) -> None:
 name = l

  1   2   3   >