date:20190731

From: Philippe Mathieu-Daudé 

The XLNX_ZYNQMP config is used in multiple subdirectories
(timer, intc). Move it to the root hw/Kconfig.

Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20190427141459.19728-2-phi...@redhat.com>
Reviewed-by: Alistair Francis 
Signed-off-by: Thomas Huth 
---
 hw/Kconfig   | 3 +++
 hw/timer/Kconfig | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/Kconfig b/hw/Kconfig
index 195f541e50..dbae1c0852 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -73,3 +73,6 @@ config XILINX
 config XILINX_AXI
 bool
 select PTIMER # for hw/dma/xilinx_axidma.c
+
+config XLNX_ZYNQMP
+bool
diff --git a/hw/timer/Kconfig b/hw/timer/Kconfig
index 51921eb63f..eefc95f35e 100644
--- a/hw/timer/Kconfig
+++ b/hw/timer/Kconfig
@@ -34,9 +34,6 @@ config TWL92230
 bool
 depends on I2C
 
-config XLNX_ZYNQMP
-bool
-
 config ALTERA_TIMER
 bool
 select PTIMER
-- 
2.21.0

[Qemu-devel] [PATCH for-4.2 v2 0/8] Kconfig switches

Here are some more Kconfig patches that clean up the switches of
existing devices and introduce proper config switches for some
other devices that were always enabled before.

v2:
 - Included Philippe's patches to avoid a conflict with XLNX_ZYNQMP
 - Don't rely on indirect dependencies, always "select XYZ" if it is
   adequate
 - Added patch for the generic loader device

Philippe Mathieu-Daudé (3):
  hw/Kconfig: Move the generic XLNX_ZYNQMP to the root hw/Kconfig
  hw/intc: Only build the xlnx-iomod-intc device for the MicroBlaze PMU
  hw/dma: Do not build the xlnx_dpdma device for the MicroBlaze machines

Thomas Huth (5):
  hw/core: Add a config switch for the "register" device
  hw/core: Add a config switch for the "or-irq" device
  hw/core: Add a config switch for the "split-irq" device
  hw/misc: Add a config switch for the "unimplemented" device
  hw/core: Add a config switch for the generic loader device

 hw/Kconfig|  4 
 hw/arm/Kconfig| 15 +++
 hw/core/Kconfig   | 13 +
 hw/core/Makefile.objs |  8 
 hw/dma/Kconfig|  1 +
 hw/dma/Makefile.objs  |  1 -
 hw/intc/Makefile.objs |  2 +-
 hw/microblaze/Kconfig |  1 +
 hw/misc/Kconfig   |  3 +++
 hw/misc/Makefile.objs |  2 +-
 hw/pci-host/Kconfig   |  3 ++-
 hw/sparc64/Kconfig|  1 +
 hw/timer/Kconfig  |  3 ---
 13 files changed, 46 insertions(+), 11 deletions(-)

-- 
2.21.0

[Qemu-devel] [PATCH v2 5/8] hw/core: Add a config switch for the "or-irq" device

The "or-irq" device is only used by certain machines. Let's add
a proper config switch for it so that it only gets compiled when we
really need it.

Signed-off-by: Thomas Huth 
---
 hw/arm/Kconfig| 3 +++
 hw/core/Kconfig   | 3 +++
 hw/core/Makefile.objs | 2 +-
 hw/pci-host/Kconfig   | 3 ++-
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index ab65ecd216..849195c3f6 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -292,6 +292,7 @@ config RASPI
 config STM32F205_SOC
 bool
 select ARM_V7M
+select OR_IRQ
 select STM32F2XX_TIMER
 select STM32F2XX_USART
 select STM32F2XX_SYSCFG
@@ -360,6 +361,7 @@ config MPS2
 select LAN9118
 select MPS2_FPGAIO
 select MPS2_SCC
+select OR_IRQ
 select PL022# Serial port
 select PL080# DMA controller
 
@@ -439,6 +441,7 @@ config ARMSSE
 select IOTKIT_SECCTL
 select IOTKIT_SYSCTL
 select IOTKIT_SYSINFO
+select OR_IRQ
 select TZ_MPC
 select TZ_MSC
 select TZ_PPC
diff --git a/hw/core/Kconfig b/hw/core/Kconfig
index d11920fcb3..984143456a 100644
--- a/hw/core/Kconfig
+++ b/hw/core/Kconfig
@@ -7,6 +7,9 @@ config PTIMER
 config FITLOADER
 bool
 
+config OR_IRQ
+bool
+
 config PLATFORM_BUS
 bool
 
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index d6cfb2a81b..ce337bd7c9 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -18,7 +18,7 @@ common-obj-$(CONFIG_SOFTMMU) += loader.o
 common-obj-$(CONFIG_FITLOADER) += loader-fit.o
 common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
 common-obj-$(CONFIG_REGISTER) += register.o
-common-obj-$(CONFIG_SOFTMMU) += or-irq.o
+common-obj-$(CONFIG_OR_IRQ) += or-irq.o
 common-obj-$(CONFIG_SOFTMMU) += split-irq.o
 common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o
 common-obj-$(CONFIG_SOFTMMU) += generic-loader.o
diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig
index 8c16d96b3f..1edc1a31d4 100644
--- a/hw/pci-host/Kconfig
+++ b/hw/pci-host/Kconfig
@@ -2,8 +2,9 @@ config PAM
 bool
 
 config PREP_PCI
-select PCI
 bool
+select PCI
+select OR_IRQ
 
 config GRACKLE_PCI
 select PCI
-- 
2.21.0

[Qemu-devel] [PATCH v2 6/8] hw/core: Add a config switch for the "split-irq" device

The "split-irq" device is currently only used by machines that use
CONFIG_ARMSSE. Let's add a proper CONFIG_SPLIT_IRQ switch for this
so that it only gets compiled when we really need it.

Signed-off-by: Thomas Huth 
---
 hw/arm/Kconfig| 3 +++
 hw/core/Kconfig   | 3 +++
 hw/core/Makefile.objs | 2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 849195c3f6..6e24c73b54 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -81,6 +81,7 @@ config MUSCA
 select ARMSSE
 select PL011
 select PL031
+select SPLIT_IRQ
 
 config MUSICPAL
 bool
@@ -364,6 +365,7 @@ config MPS2
 select OR_IRQ
 select PL022# Serial port
 select PL080# DMA controller
+select SPLIT_IRQ
 
 config FSL_IMX7
 bool
@@ -442,6 +444,7 @@ config ARMSSE
 select IOTKIT_SYSCTL
 select IOTKIT_SYSINFO
 select OR_IRQ
+select SPLIT_IRQ
 select TZ_MPC
 select TZ_MSC
 select TZ_PPC
diff --git a/hw/core/Kconfig b/hw/core/Kconfig
index 984143456a..fffb3d62b2 100644
--- a/hw/core/Kconfig
+++ b/hw/core/Kconfig
@@ -15,3 +15,6 @@ config PLATFORM_BUS
 
 config REGISTER
 bool
+
+config SPLIT_IRQ
+bool
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index ce337bd7c9..bb1afe422a 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -19,7 +19,7 @@ common-obj-$(CONFIG_FITLOADER) += loader-fit.o
 common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
 common-obj-$(CONFIG_REGISTER) += register.o
 common-obj-$(CONFIG_OR_IRQ) += or-irq.o
-common-obj-$(CONFIG_SOFTMMU) += split-irq.o
+common-obj-$(CONFIG_SPLIT_IRQ) += split-irq.o
 common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o
 common-obj-$(CONFIG_SOFTMMU) += generic-loader.o
 common-obj-$(CONFIG_SOFTMMU) += null-machine.o
-- 
2.21.0

[Qemu-devel] [PATCH v2 2/8] hw/intc: Only build the xlnx-iomod-intc device for the MicroBlaze PMU

From: Philippe Mathieu-Daudé 

The Xilinx I/O Module Interrupt Controller is only used by the
MicroBlaze PMU, not by the AArch64 machine.
Move it from the generic ZynqMP object list to the PMU specific.

Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20190427141459.19728-3-phi...@redhat.com>
Reviewed-by: Thomas Huth 
Reviewed-by: Alistair Francis 
Signed-off-by: Thomas Huth 
---
 hw/intc/Makefile.objs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 03019b9a03..f726d87532 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -3,7 +3,7 @@ common-obj-$(CONFIG_I8259) += i8259_common.o i8259.o
 common-obj-$(CONFIG_PL190) += pl190.o
 common-obj-$(CONFIG_PUV3) += puv3_intc.o
 common-obj-$(CONFIG_XILINX) += xilinx_intc.o
-common-obj-$(CONFIG_XLNX_ZYNQMP) += xlnx-pmu-iomod-intc.o
+common-obj-$(CONFIG_XLNX_ZYNQMP_PMU) += xlnx-pmu-iomod-intc.o
 common-obj-$(CONFIG_XLNX_ZYNQMP) += xlnx-zynqmp-ipi.o
 common-obj-$(CONFIG_ETRAXFS) += etraxfs_pic.o
 common-obj-$(CONFIG_IMX) += imx_avic.o imx_gpcv2.o
-- 
2.21.0

[Qemu-devel] [PATCH v2 3/8] hw/dma: Do not build the xlnx_dpdma device for the MicroBlaze machines

From: Philippe Mathieu-Daudé 

The xlnx_dpdma device is only used by the ZynqMP AArch64 machine
(not the MicroBlaze PMU). Remove it from the ZynqMP generic objects.
(Note, this entry was duplicated for the AArch64).

Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20190427141459.19728-4-phi...@redhat.com>
Reviewed-by: Thomas Huth 
Reviewed-by: Alistair Francis 
Signed-off-by: Thomas Huth 
---
 hw/dma/Makefile.objs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/dma/Makefile.objs b/hw/dma/Makefile.objs
index 8b39f9c600..b672e7a522 100644
--- a/hw/dma/Makefile.objs
+++ b/hw/dma/Makefile.objs
@@ -8,7 +8,6 @@ common-obj-$(CONFIG_XILINX_AXI) += xilinx_axidma.o
 common-obj-$(CONFIG_ZYNQ_DEVCFG) += xlnx-zynq-devcfg.o
 common-obj-$(CONFIG_ETRAXFS) += etraxfs_dma.o
 common-obj-$(CONFIG_STP2000) += sparc32_dma.o
-obj-$(CONFIG_XLNX_ZYNQMP) += xlnx_dpdma.o
 obj-$(CONFIG_XLNX_ZYNQMP_ARM) += xlnx_dpdma.o
 common-obj-$(CONFIG_XLNX_ZYNQMP_ARM) += xlnx-zdma.o
 
-- 
2.21.0

[Qemu-devel] [PATCH v2 8/8] hw/core: Add a config switch for the generic loader device

The generic loader device is completely optional. Let's add a proper
config switch for it so that people can disable it if they don't need
it and want to create a minimalistic QEMU binary.

Signed-off-by: Thomas Huth 
---
 hw/core/Kconfig   | 4 
 hw/core/Makefile.objs | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/core/Kconfig b/hw/core/Kconfig
index fffb3d62b2..fdf03514d7 100644
--- a/hw/core/Kconfig
+++ b/hw/core/Kconfig
@@ -7,6 +7,10 @@ config PTIMER
 config FITLOADER
 bool
 
+config GENERIC_LOADER
+bool
+default y
+
 config OR_IRQ
 bool
 
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index bb1afe422a..b49f880a0c 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -21,7 +21,7 @@ common-obj-$(CONFIG_REGISTER) += register.o
 common-obj-$(CONFIG_OR_IRQ) += or-irq.o
 common-obj-$(CONFIG_SPLIT_IRQ) += split-irq.o
 common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o
-common-obj-$(CONFIG_SOFTMMU) += generic-loader.o
+common-obj-$(CONFIG_GENERIC_LOADER) += generic-loader.o
 common-obj-$(CONFIG_SOFTMMU) += null-machine.o
 
 obj-$(CONFIG_SOFTMMU) += machine-qmp-cmds.o
-- 
2.21.0

[Qemu-devel] [PATCH v2 4/8] hw/core: Add a config switch for the "register" device

The "register" device is only used by certain machines. Let's add
a proper config switch for it so that it only gets compiled when we
really need it.

Signed-off-by: Thomas Huth 
---
 hw/Kconfig| 1 +
 hw/core/Kconfig   | 3 +++
 hw/core/Makefile.objs | 2 +-
 hw/dma/Kconfig| 1 +
 4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/Kconfig b/hw/Kconfig
index dbae1c0852..b45db3c813 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -76,3 +76,4 @@ config XILINX_AXI
 
 config XLNX_ZYNQMP
 bool
+select REGISTER
diff --git a/hw/core/Kconfig b/hw/core/Kconfig
index c2a1ae8122..d11920fcb3 100644
--- a/hw/core/Kconfig
+++ b/hw/core/Kconfig
@@ -9,3 +9,6 @@ config FITLOADER
 
 config PLATFORM_BUS
 bool
+
+config REGISTER
+bool
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index f8481d959f..d6cfb2a81b 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -17,7 +17,7 @@ common-obj-$(CONFIG_SOFTMMU) += machine.o
 common-obj-$(CONFIG_SOFTMMU) += loader.o
 common-obj-$(CONFIG_FITLOADER) += loader-fit.o
 common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
-common-obj-$(CONFIG_SOFTMMU) += register.o
+common-obj-$(CONFIG_REGISTER) += register.o
 common-obj-$(CONFIG_SOFTMMU) += or-irq.o
 common-obj-$(CONFIG_SOFTMMU) += split-irq.o
 common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o
diff --git a/hw/dma/Kconfig b/hw/dma/Kconfig
index 751dec5426..5c61b67bc0 100644
--- a/hw/dma/Kconfig
+++ b/hw/dma/Kconfig
@@ -16,6 +16,7 @@ config I8257
 
 config ZYNQ_DEVCFG
 bool
+select REGISTER
 
 config STP2000
 bool
-- 
2.21.0

[Qemu-devel] [PATCH v2 7/8] hw/misc: Add a config switch for the "unimplemented" device

The device is only used by some few boards. Let's use a proper Kconfig
switch so that we only compile this code if we really need it.

Signed-off-by: Thomas Huth 
---
 hw/arm/Kconfig| 9 +
 hw/microblaze/Kconfig | 1 +
 hw/misc/Kconfig   | 3 +++
 hw/misc/Makefile.objs | 2 +-
 hw/sparc64/Kconfig| 1 +
 5 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 6e24c73b54..ab9e592d74 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -217,6 +217,7 @@ config STELLARIS
 select SSI_SD
 select STELLARIS_INPUT
 select STELLARIS_ENET # ethernet
+select UNIMP
 
 config STRONGARM
 bool
@@ -283,6 +284,7 @@ config ALLWINNER_A10
 select ALLWINNER_A10_PIC
 select ALLWINNER_EMAC
 select SERIAL
+select UNIMP
 
 config RASPI
 bool
@@ -320,6 +322,7 @@ config XLNX_VERSAL
 select PL011
 select CADENCE
 select VIRTIO_MMIO
+select UNIMP
 
 config FSL_IMX25
 bool
@@ -355,6 +358,7 @@ config ASPEED_SOC
 select SSI_M25P80
 select TMP105
 select TMP421
+select UNIMP
 
 config MPS2
 bool
@@ -378,6 +382,7 @@ config FSL_IMX7
 select IMX_I2C
 select PCI_EXPRESS_DESIGNWARE
 select SDHCI
+select UNIMP
 
 config ARM_SMMUV3
 bool
@@ -389,6 +394,7 @@ config FSL_IMX6UL
 select IMX_FEC
 select IMX_I2C
 select SDHCI
+select UNIMP
 
 config MICROBIT
 bool
@@ -398,6 +404,7 @@ config NRF51_SOC
 bool
 select I2C
 select ARM_V7M
+select UNIMP
 
 config EMCRAFT_SF2
 bool
@@ -410,6 +417,7 @@ config MSF2
 select PTIMER
 select SERIAL
 select SSI
+select UNIMP
 
 config ZAURUS
 bool
@@ -448,6 +456,7 @@ config ARMSSE
 select TZ_MPC
 select TZ_MSC
 select TZ_PPC
+select UNIMP
 
 config ARMSSE_CPUID
 bool
diff --git a/hw/microblaze/Kconfig b/hw/microblaze/Kconfig
index c4dc120973..e2697ced9c 100644
--- a/hw/microblaze/Kconfig
+++ b/hw/microblaze/Kconfig
@@ -4,6 +4,7 @@ config PETALOGIX_S3ADSP1800
 select XILINX
 select XILINX_AXI
 select XILINX_ETHLITE
+select UNIMP
 
 config PETALOGIX_ML605
 bool
diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
index 385e1b0cec..51754bb47c 100644
--- a/hw/misc/Kconfig
+++ b/hw/misc/Kconfig
@@ -117,4 +117,7 @@ config AUX
 bool
 select I2C
 
+config UNIMP
+bool
+
 source macio/Kconfig
diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index e9aab519a1..e4aad707fb 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -9,7 +9,7 @@ common-obj-$(CONFIG_PCI_TESTDEV) += pci-testdev.o
 common-obj-$(CONFIG_EDU) += edu.o
 common-obj-$(CONFIG_PCA9552) += pca9552.o
 
-common-obj-y += unimp.o
+common-obj-$(CONFIG_UNIMP) += unimp.o
 common-obj-$(CONFIG_FW_CFG_DMA) += vmcoreinfo.o
 
 # ARM devices
diff --git a/hw/sparc64/Kconfig b/hw/sparc64/Kconfig
index d4d76a89be..f9f8b0f73a 100644
--- a/hw/sparc64/Kconfig
+++ b/hw/sparc64/Kconfig
@@ -17,3 +17,4 @@ config NIAGARA
 bool
 select EMPTY_SLOT
 select SUN4V_RTC
+select UNIMP
-- 
2.21.0

Re: [Qemu-devel] [PATCH-4.2 v1 6/6] target/riscv: Fix Floating Point register names

2019-07-31 Thread Chih-Min Chao

On Wed, Jul 31, 2019 at 2:41 AM Alistair Francis 
wrote:

> On Mon, Jul 29, 2019 at 8:19 AM Chih-Min Chao 
> wrote:
> >
> >
> > On Fri, Jul 26, 2019 at 2:56 AM Alistair Francis <
> alistair.fran...@wdc.com> wrote:
> >>
> >> From: Atish Patra 
> >>
> >> As per the RISC-V spec, Floating Point registers are named as f0..f31
> >> so lets fix the register names accordingly.
> >>
> >> Signed-off-by: Atish Patra 
> >> Signed-off-by: Alistair Francis 
> >> ---
> >>  target/riscv/cpu.c | 8 
> >>  1 file changed, 4 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> >> index f8d07bd20a..af1e9b7690 100644
> >> --- a/target/riscv/cpu.c
> >> +++ b/target/riscv/cpu.c
> >> @@ -40,10 +40,10 @@ const char * const riscv_int_regnames[] = {
> >>  };
> >>
> >>  const char * const riscv_fpr_regnames[] = {
> >> -  "ft0", "ft1", "ft2",  "ft3",  "ft4", "ft5", "ft6",  "ft7",
> >> -  "fs0", "fs1", "fa0",  "fa1",  "fa2", "fa3", "fa4",  "fa5",
> >> -  "fa6", "fa7", "fs2",  "fs3",  "fs4", "fs5", "fs6",  "fs7",
> >> -  "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11"
> >> +  "f0", "f1", "f2",  "f3",  "f4", "f5", "f6", "f7",
> >> +  "f8", "f9", "f10",  "f11",  "f12", "f13", "f14", "f15",
> >> +  "f16", "f17", "f18",  "f19",  "f20", "f21", "f22", "f23",
> >> +  "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
> >>  };
> >
> >
> > Could you indicate the section of the spec ?
>
> Chapter 11: "“F” Standard Extension for Single-Precision
> Floating-Point, Version 2.2", section 11.1, Figure 11.1 shows f0 -
> f32.
>
> > By chapter 20 of user spec, the patch changes the floating register name
> to architecture name but leave the integer register use the ABI name.
>
> You mean the Packed-SIMD extension?
>
> Alistair
>

I means  "Chapter 20RISC-V Assembly Programmer’s Handbook".
There is an table, "Table 20.1: Assembler mnemonics for RISC-V integer and
ﬂoating-point registers.",  describes
the architecture name and ABI name for integer and floating-point register.

By the way,  I reference the riscv-spec-2.2

chihmin



> >
> > chihmin
> >>
> >>  const char * const riscv_excp_names[] = {
> >> --
> >> 2.22.0
> >>
> >>
>

[Qemu-devel] [PATCH v4 0/1] configure: Define target access alignment in configure

2019-07-31 Thread tony.nguyen

Move the define of target access alignment earlier from
target/foo/cpu.h to configure.

Suggested in Richard Henderson's reply to "[PATCH 1/4] tcg: TCGMemOp
is now accelerator independent MemOp"

Analysed target/foo/cpu.h for more candidates to define earlier but
did not spot any other straight forward predicates.

Possible future clean ups:
- TCG_GUEST_DEFAULT_MO and TCG_TARGET_DEFAULT_MO seems like duplicates
- TARGET_INSN_START_EXTRA_WORDS 1 seems redundant as ifndef value is 1

v2:
- split cosmetic changes into separate patch
- cc corresponding maintainers

v3:
- dropped cosmetic changes
- improved commit message

v4:
- further improved commit message

Tony Nguyen (1):
  configure: Define TARGET_ALIGNED_ONLY in configure

 configure | 10 +-
 include/exec/poison.h |  1 +
 include/qom/cpu.h |  2 +-
 target/alpha/cpu.h|  2 --
 target/hppa/cpu.h |  1 -
 target/mips/cpu.h |  2 --
 target/sh4/cpu.h  |  2 --
 target/sparc/cpu.h|  2 --
 target/xtensa/cpu.h   |  2 --
 tcg/tcg.c |  2 +-
 tcg/tcg.h |  8 +---
 11 files changed, 17 insertions(+), 17 deletions(-)

-- 
2.22.0

[Qemu-devel] [PATCH v4 1/1] configure: Define target access alignment in configure

2019-07-31 Thread tony.nguyen

Rename ALIGNED_ONLY to TARGET_ALIGNED_ONLY for clarity and move
defines out of target/foo/cpu.h into configure, as we do with
TARGET_WORDS_BIGENDIAN, so that it is always defined early.

Poison TARGET_ALIGNED_ONLY to prevent use in common code.

Signed-off-by: Tony Nguyen 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
---
 configure | 10 +-
 include/exec/poison.h |  1 +
 include/qom/cpu.h |  2 +-
 target/alpha/cpu.h|  2 --
 target/hppa/cpu.h |  1 -
 target/mips/cpu.h |  2 --
 target/sh4/cpu.h  |  2 --
 target/sparc/cpu.h|  2 --
 target/xtensa/cpu.h   |  2 --
 tcg/tcg.c |  2 +-
 tcg/tcg.h |  8 +---
 11 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/configure b/configure
index 714e7fb6a1..482ba0b240 100755
--- a/configure
+++ b/configure
@@ -7431,8 +7431,13 @@ for target in $target_list; do
 target_dir="$target"
 config_target_mak=$target_dir/config-target.mak
 target_name=$(echo $target | cut -d '-' -f 1)
+target_aligned_only="no"
+case "$target_name" in
+  
alpha|hppa|mips64el|mips64|mipsel|mips|mipsn32|mipsn32el|sh4|sh4eb|sparc|sparc64|sparc32plus|xtensa|xtensaeb)
+  target_aligned_only="yes"
+  ;;
+esac
 target_bigendian="no"
-
 case "$target_name" in
   
armeb|aarch64_be|hppa|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or1k|ppc|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
   target_bigendian=yes
@@ -7717,6 +7722,9 @@ fi
 if supported_whpx_target $target; then
 echo "CONFIG_WHPX=y" >> $config_target_mak
 fi
+if test "$target_aligned_only" = "yes" ; then
+  echo "TARGET_ALIGNED_ONLY=y" >> $config_target_mak
+fi
 if test "$target_bigendian" = "yes" ; then
   echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
 fi
diff --git a/include/exec/poison.h b/include/exec/poison.h
index b862320fa6..955eb863ab 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -35,6 +35,7 @@
 #pragma GCC poison TARGET_UNICORE32
 #pragma GCC poison TARGET_XTENSA
 
+#pragma GCC poison TARGET_ALIGNED_ONLY
 #pragma GCC poison TARGET_HAS_BFLT
 #pragma GCC poison TARGET_NAME
 #pragma GCC poison TARGET_SUPPORTS_MTTCG
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 5ee0046b62..9b50b73339 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -89,7 +89,7 @@ struct TranslationBlock;
  * @do_unassigned_access: Callback for unassigned access handling.
  * (this is deprecated: new targets should use do_transaction_failed instead)
  * @do_unaligned_access: Callback for unaligned access handling, if
- * the target defines #ALIGNED_ONLY.
+ * the target defines #TARGET_ALIGNED_ONLY.
  * @do_transaction_failed: Callback for handling failed memory transactions
  * (ie bus faults or external aborts; not MMU faults)
  * @virtio_is_big_endian: Callback to return %true if a CPU which supports
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index b3e8a823e1..16eb8047cf 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -23,8 +23,6 @@
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
 
-#define ALIGNED_ONLY
-
 /* Alpha processors have a weak memory model */
 #define TCG_GUEST_DEFAULT_MO  (0)
 
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index aab251bc4b..2be67c289a 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -30,7 +30,6 @@
basis.  It's probably easier to fall back to a strong memory model.  */
 #define TCG_GUEST_DEFAULT_MOTCG_MO_ALL
 
-#define ALIGNED_ONLY
 #define MMU_KERNEL_IDX   0
 #define MMU_USER_IDX 3
 #define MMU_PHYS_IDX 4
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index 21c0615e02..c13cd4eb31 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -1,8 +1,6 @@
 #ifndef MIPS_CPU_H
 #define MIPS_CPU_H
 
-#define ALIGNED_ONLY
-
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
 #include "fpu/softfloat.h"
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index aee733eaaa..ecaa7a18a9 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -23,8 +23,6 @@
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
 
-#define ALIGNED_ONLY
-
 /* CPU Subtypes */
 #define SH_CPU_SH7750  (1 << 0)
 #define SH_CPU_SH7750S (1 << 1)
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index 8ed2250cd0..1406f0ba2e 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -5,8 +5,6 @@
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
 
-#define ALIGNED_ONLY
-
 #if !defined(TARGET_SPARC64)
 #define TARGET_DPREGS 16
 #else
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index 2c277134f1..0459243e6b 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -32,8 +32,6 @@
 #include "exec/cpu-defs.h"
 #include "xtensa-isa.h"
 
-#define ALIGNED_ONLY
-
 /* Xtensa processors have a weak memory model */
 #define TCG_GUEST_DEFAULT_MO  (0)
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index be2c33c400..8d23fb0592 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1926,7 +1926,7 @@ static const char * const ldst_name[] =
 };
 
 static const char * const alig

Re: [Qemu-devel] [RFC] HACKING: Document 'struct' keyword usage

On 30/07/2019 23.07, Eduardo Habkost wrote:
> Sometimes we use the 'struct' keyword to help us reduce
> dependencies between header files.  Document that practice.
> 
> Signed-off-by: Eduardo Habkost 
> ---
> I wonder if this is too terse?  Should we give examples?
> ---
>  HACKING | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/HACKING b/HACKING
> index 0fc3e0fc04..112685bdaf 100644
> --- a/HACKING
> +++ b/HACKING
> @@ -101,6 +101,8 @@ it points to, or it is aliased to another pointer that is.
>  
>  2.3. Typedefs
>  Typedefs are used to eliminate the redundant 'struct' keyword.
> +However, the 'struct' keyword may be sometimes used in header
> +files to avoid unnecessary dependencies between headers.

See also the discussion earlier this year:

https://www.mail-archive.com/qemu-devel@nongnu.org/msg586180.html

... and we should merge HACKING and CODING_STYLE finally (that was on my
private TODO list, but I never found the time to do it).

 Thomas

Re: [Qemu-devel] [PATCH] spapr: Implement better workaround in spapr-vty device

2019-07-31 Thread David Gibson

On Wed, Jul 31, 2019 at 02:36:54PM +1000, Paul Mackerras wrote:
> Linux guest kernels have code which scans the string of characters
> returned from the H_GET_TERM_CHAR hypercall and removes any \0
> character which comes immediately after a \r character.  This is to
> work around a bug which was present in some ancient versions of
> PowerVM.  In order to avoid the corruption of the console byte stream
> that this introduced, commit 6c3bc244d3cb ("spapr: Implement bug in
> spapr-vty device to be compatible with PowerVM") added a workaround
> which adds a \0 character after every \r character.  Unfortunately,
> this corrupts the console byte stream for those operating systems,
> such as AIX, which don't remove the null bytes.
> 
> We can avoid triggering the Linux kernel workaround if we avoid
> returning a buffer which contains a \0 after a \r.  We can do that by
> breaking out of the loop in vty_getchars() if we are about to insert a
> \0 and the previous character in the buffer is a \r.  That means we
> return the characters up to the \r for the current H_GET_TERM_CHAR,
> and the characters starting with the \0 for the next one.
> 
> With this workaround, we don't insert any spurious characters and we
> avoid triggering the Linux kernel workaround, so the guest will
> receive an uncorrupted stream whether or not they have the workaround.
> 
> Fixes: 6c3bc244d3cb ("spapr: Implement bug in spapr-vty device to be 
> compatible with PowerVM")
> Signed-off-by: Paul Mackerras 

Applied to ppc-for-4.2, thanks.

> ---
>  hw/char/spapr_vty.c | 30 --
>  1 file changed, 12 insertions(+), 18 deletions(-)
> 
> diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c
> index 617303dbaf..65a7c2ffbd 100644
> --- a/hw/char/spapr_vty.c
> +++ b/hw/char/spapr_vty.c
> @@ -57,25 +57,19 @@ static int vty_getchars(SpaprVioDevice *sdev, uint8_t 
> *buf, int max)
>  int n = 0;
>  
>  while ((n < max) && (dev->out != dev->in)) {
> -buf[n++] = dev->buf[dev->out++ % VTERM_BUFSIZE];
> -
> -/* PowerVM's vty implementation has a bug where it inserts a
> - * \0 after every \r going to the guest.  Existing guests have
> - * a workaround for this which removes every \0 immediately
> - * following a \r, so here we make ourselves bug-for-bug
> - * compatible, so that the guest won't drop a real \0-after-\r
> - * that happens to occur in a binary stream. */
> -if (buf[n - 1] == '\r') {
> -if (n < max) {
> -buf[n++] = '\0';
> -} else {
> -/* No room for the extra \0, roll back and try again
> - * next time */
> -dev->out--;
> -n--;
> -break;
> -}
> +/*
> + * Long ago, PowerVM's vty implementation had a bug where it
> + * inserted a \0 after every \r going to the guest.  Existing
> + * guests have a workaround for this which removes every \0
> + * immediately following a \r.  To avoid triggering this
> + * workaround, we stop before inserting a \0 if the preceding
> + * character in the output buffer is a \r.
> + */
> +if (n > 0 && (buf[n - 1] == '\r') &&
> +(dev->buf[dev->out % VTERM_BUFSIZE] == '\0')) {
> +break;
>  }
> +buf[n++] = dev->buf[dev->out++ % VTERM_BUFSIZE];
>  }
>  
>  qemu_chr_fe_accept_input(&dev->chardev);

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] When to use qemu/typedefs.h

On 31/07/2019 08.37, Markus Armbruster wrote:
> Paolo Bonzini  writes:
> 
>> On 30/07/19 15:15, Eric Blake wrote:
 We occasionally give up and use types directly rather than their typedef
 names, flouting the coding style.  This patch does.  Trades messing with
 qemu/typedefs.h for having to write 'struct' a few times.
>>
>> I think Markus made the right call here.  Using "struct Foo;" in headers
>> is a null price to pay if all you need is declaring a pointer-typed
>> field or parameter.
> 
> Eduardo posted a patch to HACKING to clarify this non-usage of typedef
> is okay.
> 
> Should we continue to mandate typedef names elsewhere?  It adds
> cognitive load: you have to decide where to put the typedef, and when
> not to use it.

IMHO we should get rid of mandating typedefs. They are causing too much
trouble - e.g. do you also remember the issues with duplicated typedefs
in certain compiler versions in the past? (these should be hopefully
gone now, but still...)

And many QEMU developers are also working on the Linux kernel, which
rather forbids typedefs. Having to switch your mind back and forth
whether to use typedefs or not is really annoying.

So if you ask me, stop mandating it! It's ok as optional feature in QEMU
for types that are used all over the place, but we really should not
enforce it for each and every struct anymore.

 Thomas

[Qemu-devel] [PATCH] numa: numa nodeid need not be sequential

2019-07-31 Thread Daniel Black

Replace all nodeid assumptions with lookups in the numa_info array and
remove aspects that assume a sequential numbering of nodes. This enables
non-sequential NUMA node number topoligies to be created.

Default assignments of CPU->nodeid (get_default_cpu_node_id) now return
a nodeid from the numa_info array.

x86 will use the node is as the Proximity Domain (which the
linux kernel will map down to sequential node numbers). In Linux kernel
output look at the RAT: and PXM: references in the kernel early boot.

Small enhancements where made to error messages to be more explicit
about errors in node specification.

Signed-off-by: Daniel Black 
---

Below is a TCG test script that covers ppc64le/x86_64/aarch64 architecutes:

Uncomment run* lines as needed. Manually complete steps at end to
verify:

Caution, brutal killalls contained within.

#!/bin/bash
set -x -v

QEMUHOME=${HOME}/repos/qemu/
# optional but make it easy to install/run numactl --hardware
#ALPINE_NET=""
ALPINE_NET="ip=dhcp alpine_repo=http://dl-cdn.alpinelinux.org/alpine/edge/main/";

ALPINE_HOME=${HOME}/repos/alpine/alpine-netboot-3.10.1-

# x86 / armv7 - no CONFIG_NUMA=y support in kernel
# Kernel configs: https://git.alpinelinux.org/aports/tree/main/linux-vanilla/
# s390x - no numa support in QEMU
for ARCH in x86_64 aarch64 ppc64le
do
 if [ ! -d ${ALPINE_HOME}${ARCH} ]
 then
   mkdir ${ALPINE_HOME}${ARCH}
   wget 
http://dl-cdn.alpinelinux.org/alpine/v3.10/releases/${ARCH}/alpine-netboot-3.10.1-${ARCH}.tar.gz
 -O - | tar -zxf - -C ${ALPINE_HOME}${ARCH}
 fi
done

if [ ! -x ${ALPINE_HOME}i386 ]
then
  ln -s ${ALPINE_HOME}x86 ${ALPINE_HOME}i386
fi

if [ ! -x ${ALPINE_HOME}arm ]
then
  ln -s ${ALPINE_HOME}armv7 ${ALPINE_HOME}arm
fi

if [ ! -x ${ALPINE_HOME}ppc64 ]
then
  ln -s ${ALPINE_HOME}ppc64le ${ALPINE_HOME}ppc64
fi

# Note "virtual" kernels don't have numa enabled
run()
{
  NUMA=$1
  ARCH=$2
  ARGS=$3
  CONSOLE=$4
  ${QEMUHOME}/${ARCH}-softmmu/qemu-system-${ARCH} \
${ARGS} \
-kernel ${ALPINE_HOME}${ARCH}/boot/vmlinuz-vanilla  \
-initrd ${ALPINE_HOME}${ARCH}/boot/initramfs-vanilla \
-append "${CONSOLE} ${ALPINE_NET}" \
-m 2G \
${NUMA}
  echo
}

# This ends up as odd:
# ends up with both CPUs are on same node
# as 0 and 8 % 2 (nodes) are the same
# in short - don't run legacy with gaps with
# odd numa node numbers (like 0 and 8).
run_legacy()
{
  run "-smp 2,cores=3,sockets=2,maxcpus=6 \
   -numa node,mem=1G \
   -numa node,mem=1G,nodeid=8 \
   -numa dist,src=0,dst=8,val=21" "$@"
}

run_memdev_implicit_core()
{
  run "-smp cpus=6,maxcpus=8,cores=4,sockets=2 \
   -object memory-backend-ram,id=ram0,size=1G \
   -object memory-backend-ram,id=ram1,size=1G \
   -numa node,memdev=ram0,nodeid=0 \
   -numa node,memdev=ram1,nodeid=8 \
   -numa dist,src=0,dst=8,val=21" "$@"
}

run_memdev_explicit_core()
{
  run "-smp cpus=6,maxcpus=8,cores=4,sockets=2 \
   -object memory-backend-ram,id=ram0,size=1G \
   -object memory-backend-ram,id=ram1,size=1G \
   -numa node,memdev=ram0,cpus=0-3,nodeid=0 \
   -numa node,memdev=ram1,cpus=4-7,nodeid=8 \
   -numa dist,src=0,dst=8,val=21" "$@"
}

for arch in x86_64 ppc64 aarch64 s390x; do killall qemu-system-$arch; done
killall vncviewer

# i386 Alpine kernels don't have NUMA
#run_memdev_implicit_core i386 "-machine pc -nographic" console=ttyS0
# armv7 kernel's don't have NUMA
#run_legacy arm "-machine virt -cpu cortex-a15 -nographic" console=ttyAMA0

# GOOD
run_legacy x86_64 "-machine pc -nographic" console=ttyS0
run_memdev_implicit_core x86_64 "-machine pc -nographic" console=ttyS0
run_memdev_explicit_core x86_64 "-machine pc -nographic" console=ttyS0

# GOOD
#run_legacy aarch64 "-machine virt -cpu cortex-a57 -nographic" console=ttyAMA0
#run_memdev_implicit_core  aarch64 "-machine virt -cpu cortex-a57 -nographic" 
console=ttyAMA0
#run_memdev_explicit_core  aarch64 "-machine virt -cpu cortex-a57 -nographic" 
console=ttyAMA0

# PPC not doing numa distance (not a regression)
#(sleep 1; vncviewer :0) &

# GOOD
# run_legacy ppc64 "-machine pseries -cpu POWER9 -display vnc=:0" "numa=debug"
# run_memdev_implicit_core ppc64 "-machine pseries -cpu POWER9 -display vnc=:0" 
"numa=debug"
# run_memdev_explicit_core ppc64 "-machine pseries -cpu POWER9 -display vnc=:0" 
"numa=debug"

# ON P8 ppc64le host:
# run_memdev_implicit_core ppc64 "-machine pseries -cpu host -accel kvm 
-display vnc=:0" "numa=debug"

# Couldn't boot Alpine ARM kernel on this machine type:
# arm sbsa ref - appears to be a BMC so not really a numa target?
# seems ok looking at the results of sbsa_ref_get_default_cpu_node_id however 
it display no
# output when booting

# run_legacy aarch64 "-machine sbsa-ref -nographic" console=ttyAMA0

# Then run:
# sh -c 'apk add numactl-tools && numactl --hardware'
#
# alternately examine results in:
# ls -la /sys/devices/system/node/node*/cpu*
# more /sys/devices/system/node/node*/distance
#
# x86 node numbers are renumbered by kernel. To view
# acpi

Re: [Qemu-devel] [PATCH for-4.2 02/13] qcow2: Keep unknown extra snapshot data

On 30.07.19 19:56, Eric Blake wrote:
> On 7/30/19 12:24 PM, Max Reitz wrote:
>> The qcow2 specification says to ignore unknown extra data fields in
>> snapshot table entries.  Currently, we discard it whenever we update the
>> image, which is a bit different from "ignore".
>>
>> This patch makes the qcow2 driver keep all unknown extra data fields
>> when updating an image's snapshot table.
> 
> The cover letter questioned whether we want this, but I think we do.
> 
>>
>> Signed-off-by: Max Reitz 
>> ---
>>  block/qcow2.h  |  5 
>>  block/qcow2-snapshot.c | 59 +++---
>>  2 files changed, 55 insertions(+), 9 deletions(-)
>>
>> diff --git a/block/qcow2.h b/block/qcow2.h
>> index 175708cee0..290a48b77e 100644
>> --- a/block/qcow2.h
>> +++ b/block/qcow2.h
>> @@ -61,6 +61,9 @@
>>   * space for snapshot names and IDs */
>>  #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
>>  
>> +/* Maximum amount of extra data per snapshot table entry to accept */
>> +#define QCOW_MAX_SNAPSHOT_EXTRA_DATA 1024
>> +
>>  /* Bitmap header extension constraints */
>>  #define QCOW2_MAX_BITMAPS 65535
>>  #define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS)
>> @@ -178,6 +181,8 @@ typedef struct QCowSnapshot {
>>  uint32_t date_sec;
>>  uint32_t date_nsec;
>>  uint64_t vm_clock_nsec;
>> +uint32_t extra_data_size;
>> +void *unknown_extra_data; /* Extra data past QCowSnapshotExtraData */
> 
> Is char* going to be any easier to use than void*?
> 
>> +++ b/block/qcow2-snapshot.c
> 
>> @@ -80,30 +80,52 @@ int qcow2_read_snapshots(BlockDriverState *bs, Error 
>> **errp)
> 
>> +
>> +/* Read known extra data */
>>  ret = bdrv_pread(bs->file, offset, &extra,
>> - MIN(sizeof(extra), extra_data_size));
>> + MIN(sizeof(extra), sn->extra_data_size));
>>  if (ret < 0) {
>>  error_setg_errno(errp, -ret, "Failed to read snapshot table");
>>  goto fail;
>>  }
>> -offset += extra_data_size;
>> +offset += MIN(sizeof(extra), sn->extra_data_size);
>>  
>> -if (extra_data_size >= 8) {
>> +if (sn->extra_data_size >= 8) {
> 
> While touching this, is it worth spelling it:
> if (sn->extra_data_size >= sizeof(extra.vm_state_size_large)) {
> 
>>  sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
>>  }
>>  
>> -if (extra_data_size >= 16) {
>> +if (sn->extra_data_size >= 16) {
> 
> and a similar use of sizeof() instead of hard-coded 16 here?

Well, the most verbose spelling would be offsetof() + sizeof() both
times.  Hm.  I’ll see how it looks.  I think it is obvious enough what
it means as it is, and the full offsetof() + sizeof() might actually be
less obvious just because it’s longer and thus takes longer to read.

>>  sn->disk_size = be64_to_cpu(extra.disk_size);
>>  } else {
>>  sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
>>  }
>>  
>> +if (sn->extra_data_size > sizeof(extra)) {
>> +/* Store unknown extra data */
>> +size_t unknown_extra_data_size =
>> +sn->extra_data_size - sizeof(extra);
>> +
>> +sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
>> +ret = bdrv_pread(bs->file, offset, sn->unknown_extra_data,
>> + unknown_extra_data_size);
> 
> We're doing two separate bdrv_pread()s. Would it be better to do a
> single bdrv_preadv into a vector composed of &extra and
> &unknown_extra_data, for less I/O?  (Then again, this micro-optimization
> is probably in the noise in the long run)

Interesting idea, we could even add the ID and name string into that
vector.  But I’m not sure whether it’s really useful.

(I’ll take a look anyway, because it sounds interesting.)

>> +if (ret < 0) {
>> +error_setg_errno(errp, -ret, "Failed to read snapshot 
>> table");
>> +goto fail;
>> +}
>> +offset += unknown_extra_data_size;
>> +}
>> +
>>  /* Read snapshot ID */
>>  sn->id_str = g_malloc(id_str_size + 1);
>>  ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
>> @@ -161,7 +183,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
>>  sn = s->snapshots + i;
>>  offset = ROUND_UP(offset, 8);
>>  offset += sizeof(h);
>> -offset += sizeof(extra);
>> +offset += MAX(sizeof(extra), sn->extra_data_size);
>>  offset += strlen(sn->id_str);
>>  offset += strlen(sn->name);
>>  
>> @@ -208,7 +230,8 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
>>  h.date_sec = cpu_to_be32(sn->date_sec);
>>  h.date_nsec = cpu_to_be32(sn->date_nsec);
>>  h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
>> -h.extra_data_size = cpu_to_be32(sizeof(extra));
>> +

Re: [Qemu-devel] [PATCH for-4.2 05/13] qcow2: Write v3-compliant snapshot list on upgrade

On 30.07.19 20:10, Eric Blake wrote:
> On 7/30/19 12:25 PM, Max Reitz wrote:
>> qcow2 v3 requires every snapshot table entry to have two extra data
>> fields: The 64-bit VM state size, and the virtual disk size.  Both are
>> optional for v2 images, so they may not be present.
>>
>> qcow2_upgrade() therefore should update the snapshot table to ensure all
>> entries have these extra data fields.
>>
>> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1727347
>> Reported-by: Eric Blake 
>> Signed-off-by: Max Reitz 
>> ---
>>  block/qcow2.c | 29 +++--
>>  1 file changed, 27 insertions(+), 2 deletions(-)
>>
> 
>> +
>> +/*
>> + * In v2, snapshots do not need to have extra data.  v3 requires
>> + * the 64-bit VM state size and the virtual disk size to be
>> + * present.
>> + * qcow2_write_snapshots() will always write the list in the
>> + * v3-compliant format.
>> + */
>> +need_snapshot_update = false;
>> +for (i = 0; i < s->nb_snapshots; i++) {
>> +if (s->snapshots[i].extra_data_size < 16) {
> 
> s/16/sizeof(extra)/ looks a bit nicer, but doesn't change semantics.

Hm, but it’s not quite right.  I mean, right now it is, but if we were
to add a new field to snapshot metadata, it wouldn’t be.  It should be 16.

I can make it something like sizeof(extra.vm_state_size_large) +
sizeof(extra.disk_size), though.

Max

> Reviewed-by: Eric Blake 




signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH for-4.2 06/13] qcow2: Separate qcow2_check_read_snapshot_table()

On 30.07.19 20:53, Eric Blake wrote:
> On 7/30/19 12:25 PM, Max Reitz wrote:
>> Reading the snapshot table can fail.  That is a problem when we want to
>> repair the image.
>>
>> Therefore, stop reading the snapshot table in qcow2_do_open() in check
>> mode.  Instead, add a new function qcow2_check_read_snapshot_table()
>> that reads the snapshot table at a later point.  In the future, we want
>> to handle errors here and fix them.
>>
>> Signed-off-by: Max Reitz 
>> ---
>>  block/qcow2.h  |  4 +++
>>  block/qcow2-snapshot.c | 58 
>>  block/qcow2.c  | 76 --
>>  3 files changed, 120 insertions(+), 18 deletions(-)
>>
> 
>> +++ b/block/qcow2-snapshot.c
>> @@ -321,6 +321,64 @@ fail:
>>  return ret;
>>  }
>>  
>> +int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
>> + BdrvCheckResult *result,
>> + BdrvCheckMode fix)
>> +{
>> +BDRVQcow2State *s = bs->opaque;
>> +Error *local_err = NULL;
>> +int ret;
>> +struct {
>> +uint32_t nb_snapshots;
>> +uint64_t snapshots_offset;
>> +} QEMU_PACKED snapshot_table_pointer;
>> +
>> +/* qcow2_do_open() discards this information in check mode */
>> +ret = bdrv_pread(bs->file, 60, &snapshot_table_pointer,
>> + sizeof(snapshot_table_pointer));
> 
> Should that '60' be a named constant or offsetof() expression?  (I know,
> you just copied this instance from elsewhere)

Well, I copied it from the specification. O:-)

You’re completely right.  It should be offsetof(QCowHeader, nb_snapshots).

(I blame the fact that I had started writing the test by this point, so
I was already immersed in so many magic numbers.)

> Reviewed-by: Eric Blake 




signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 3/3] i386/kvm: initialize struct at full before ioctl call

2019-07-31 Thread Christophe de Dinechin



Christian Borntraeger writes:

> On 30.07.19 18:44, Philippe Mathieu-Daudé wrote:
>> On 7/30/19 6:01 PM, Andrey Shinkevich wrote:
>>> Not the whole structure is initialized before passing it to the KVM.
>>> Reduce the number of Valgrind reports.
>>>
>>> Signed-off-by: Andrey Shinkevich 
>>> ---
>>>  target/i386/kvm.c | 3 +++
>>>  1 file changed, 3 insertions(+)
>>>
>>> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
>>> index dbbb137..ed57e31 100644
>>> --- a/target/i386/kvm.c
>>> +++ b/target/i386/kvm.c
>>> @@ -190,6 +190,7 @@ static int kvm_get_tsc(CPUState *cs)
>>>  return 0;
>>>  }
>>>
>>> +memset(&msr_data, 0, sizeof(msr_data));
>>
>> I wonder the overhead of this one...
>
> Cant we use designated initializers like in
>
> commit bdfc8480c50a53d91aa9a513d23a84de0d5fbc86
> Author: Christian Borntraeger 
> AuthorDate: Thu Oct 30 09:23:41 2014 +0100
> Commit: Paolo Bonzini 
> CommitDate: Mon Dec 15 12:21:01 2014 +0100
>
> valgrind/i386: avoid false positives on KVM_SET_XCRS ioctl
>
> and others?
>
> This should minimize the impact.

Oh, when you talked about using designated initializers, I thought you
were talking about fully initializing the struct, like so:

diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index dbbb13772a..3533870c43 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -180,19 +180,20 @@ static int kvm_get_tsc(CPUState *cs)
 {
 X86CPU *cpu = X86_CPU(cs);
 CPUX86State *env = &cpu->env;
-struct {
-struct kvm_msrs info;
-struct kvm_msr_entry entries[1];
-} msr_data;
 int ret;

 if (env->tsc_valid) {
 return 0;
 }

-msr_data.info.nmsrs = 1;
-msr_data.entries[0].index = MSR_IA32_TSC;
-env->tsc_valid = !runstate_is_running();
+struct {
+struct kvm_msrs info;
+struct kvm_msr_entry entries[1];
+} msr_data = {
+.info = { .nmsrs =  1 },
+.entries = { [0] = { .index = MSR_IA32_TSC } }
+};
+ env->tsc_valid = !runstate_is_running();

 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
 if (ret < 0) {


This gives the compiler maximum opportunities to flag mistakes like
initializing the same thing twice, and make it easier (read no smart
optimizations) to initialize in one go. Moving the declaration past the
'if' also addresses Philippe's concern.

>>
>>>  msr_data.info.nmsrs = 1;
>>>  msr_data.entries[0].index = MSR_IA32_TSC;
>>>  env->tsc_valid = !runstate_is_running();
>>> @@ -1706,6 +1707,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
>>>
>>>  if (has_xsave) {
>>>  env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
>>> +memset(env->xsave_buf, 0, sizeof(struct kvm_xsave));
>>
>> OK
>>
>>>  }
>>>
>>>  max_nested_state_len = kvm_max_nested_state_length();
>>> @@ -3477,6 +3479,7 @@ static int kvm_put_debugregs(X86CPU *cpu)
>>>  return 0;
>>>  }
>>>
>>> +memset(&dbgregs, 0, sizeof(dbgregs));
>>
>> OK
>>
>>>  for (i = 0; i < 4; i++) {
>>>  dbgregs.db[i] = env->dr[i];
>>>  }
>>
>> We could remove 'dbgregs.flags = 0;'
>>
>> Reviewed-by: Philippe Mathieu-Daudé 
>>


--
Cheers,
Christophe de Dinechin (IRC c3d)

Re: [Qemu-devel] [PATCH for-4.2 v10 11/15] virtio-iommu: Expose the IOAPIC MSI reserved region when relevant

2019-07-31 Thread Auger Eric

Hi Kevin, Michael,

On 7/31/19 1:20 AM, Tian, Kevin wrote:
>> From: Michael S. Tsirkin [mailto:m...@redhat.com]
>> Sent: Wednesday, July 31, 2019 3:38 AM
>>
>> On Tue, Jul 30, 2019 at 07:21:33PM +0200, Eric Auger wrote:
>>> We introduce a new msi_bypass field which indicates whether
>>> the IOAPIC MSI window [0xFEE0 - 0xFEEF] must be exposed
> 
> it's not good to call it IOAPIC MSI window. any write to this range, either
> from IOAPIC or PCI device, is interpreted by the platform as interrupt
> request. I'd call it "x86 interrupt address range".
Thank you for the clarification. I will reword the commit message as
suggested.
> 
>>> as a reserved region. By default the field is set to true at
>>> instantiation time. Later on we will introduce a property at
>>> virtio pci proxy level to turn it off.
>>>
>>> Signed-off-by: Eric Auger 
>>>
>>> ---
>>>
>>> v8 -> v9:
>>> - pass IOAPIC_RANGE_END to virtio_iommu_register_resv_region
>>> - take into account the change in the struct virtio_iommu_probe_resv_mem
>>>   definition
>>> - We just introduce the field here. A property will be introduced later on
>>>   at pci proxy level.
>>> ---
>>>  hw/virtio/virtio-iommu.c | 36 
>>>  include/hw/virtio/virtio-iommu.h |  1 +
>>>  2 files changed, 37 insertions(+)
>>>
>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>>> index 66be9a4627..74038288b0 100644
>>> --- a/hw/virtio/virtio-iommu.c
>>> +++ b/hw/virtio/virtio-iommu.c
>>> @@ -39,6 +39,9 @@
>>>  #define VIOMMU_DEFAULT_QUEUE_SIZE 256
>>>  #define VIOMMU_PROBE_SIZE 512
>>>
>>> +#define IOAPIC_RANGE_START  (0xfee0)
>>> +#define IOAPIC_RANGE_END(0xfeef)
>>> +
>>>  #define SUPPORTED_PROBE_PROPERTIES (\
>>>  1 << VIRTIO_IOMMU_PROBE_T_RESV_MEM)
>>>
>>
>> Sorry where are these numbers coming from?
> 
> this is architecturally defined in x86 SDM.
> 
>> Does this really work on all platforms?
> 
> x86 only. 
Yes, the initial goal was to allow the x86 integration. Maybe I should allow
the machine to pass reserved regions as device properties instead.

As integration with pc/q35 is beyond the scope of this initial series,
maybe I should remove that patch?

Thanks

Eric
> 
>> With all guests?
> 
> yes.
> 
>>
>>> @@ -100,6 +103,30 @@ static void
>> virtio_iommu_detach_endpoint_from_domain(viommu_endpoint *ep)
>>>  ep->domain = NULL;
>>>  }
>>>
>>> +static void virtio_iommu_register_resv_region(viommu_endpoint *ep,
>>> +  uint8_t subtype,
>>> +  uint64_t start, uint64_t end)
>>> +{
>>> +viommu_interval *interval;
>>> +struct virtio_iommu_probe_resv_mem *resv_reg_prop;
>>> +size_t prop_size = sizeof(struct virtio_iommu_probe_resv_mem);
>>> +size_t value_size = prop_size -
>>> +sizeof(struct virtio_iommu_probe_property);
>>> +
>>> +interval = g_malloc0(sizeof(*interval));
>>> +interval->low = start;
>>> +interval->high = end;
>>> +
>>> +resv_reg_prop = g_malloc0(prop_size);
>>> +resv_reg_prop->head.type = VIRTIO_IOMMU_PROBE_T_RESV_MEM;
>>> +resv_reg_prop->head.length = cpu_to_le64(value_size);
>>> +resv_reg_prop->subtype = cpu_to_le64(subtype);
>>> +resv_reg_prop->start = cpu_to_le64(start);
>>> +resv_reg_prop->end = cpu_to_le64(end);
>>> +
>>> +g_tree_insert(ep->reserved_regions, interval, resv_reg_prop);
>>> +}
>>> +
>>>  static viommu_endpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>>>uint32_t ep_id)
>>>  {
>>> @@ -117,6 +144,12 @@ static viommu_endpoint
>> *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>>>  ep->reserved_regions =
>> g_tree_new_full((GCompareDataFunc)interval_cmp,
>>>  NULL, (GDestroyNotify)g_free,
>>>  (GDestroyNotify)g_free);
>>> +if (s->msi_bypass) {
>>> +virtio_iommu_register_resv_region(ep,
>> VIRTIO_IOMMU_RESV_MEM_T_MSI,
>>> +  IOAPIC_RANGE_START,
>>> +  IOAPIC_RANGE_END);
>>> +}
>>> +
>>>  return ep;
>>>  }
>>>
>>> @@ -822,6 +855,9 @@ static void virtio_iommu_set_status(VirtIODevice
>> *vdev, uint8_t status)
>>>
>>>  static void virtio_iommu_instance_init(Object *obj)
>>>  {
>>> +VirtIOIOMMU *s = VIRTIO_IOMMU(obj);
>>> +
>>> +s->msi_bypass = true;
>>>  }
>>>
>>>  static const VMStateDescription vmstate_virtio_iommu = {
>>> diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-
>> iommu.h
>>> index f55f48d304..56c8b4e57f 100644
>>> --- a/include/hw/virtio/virtio-iommu.h
>>> +++ b/include/hw/virtio/virtio-iommu.h
>>> @@ -59,6 +59,7 @@ typedef struct VirtIOIOMMU {
>>>  GTree *domains;
>>>  QemuMutex mutex;
>>>  GTree *endpoints;
>>> +bool msi_bypass;
>>>  } VirtIOIOMMU;
>>>
>>>  #endif
>>> --
>>> 2.20.1

Re: [Qemu-devel] [PATCH for-4.2 08/13] qcow2: Fix broken snapshot table entries

On 30.07.19 21:02, Eric Blake wrote:
> On 7/30/19 12:25 PM, Max Reitz wrote:
>> The only case where we currently reject snapshot table entries is when
>> they have too much extra data.  Fix them with qemu-img check -r all by
>> counting it as a corruption, reducing their extra_data_size, and then
>> letting qcow2_check_fix_snapshot_table() do the rest.
>>
>> Signed-off-by: Max Reitz 
>> ---
>>  block/qcow2-snapshot.c | 69 ++
>>  1 file changed, 56 insertions(+), 13 deletions(-)
>>
> 
>> @@ -112,16 +141,22 @@ int qcow2_read_snapshots(BlockDriverState *bs, Error 
>> **errp)
>>  }
>>  
>>  if (sn->extra_data_size > sizeof(extra)) {
>> -/* Store unknown extra data */
>>  size_t unknown_extra_data_size =
>>  sn->extra_data_size - sizeof(extra);
>>  
>> -sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
>> -ret = bdrv_pread(bs->file, offset, sn->unknown_extra_data,
>> - unknown_extra_data_size);
>> -if (ret < 0) {
>> -error_setg_errno(errp, -ret, "Failed to read snapshot 
>> table");
>> -goto fail;
>> +if (discard_unknown_extra_data) {
>> +/* Discard unknown extra data */
>> +sn->extra_data_size = sizeof(extra);
> 
> This truncates it down to just the data we know. Should it instead
> truncate down to the 1024 bytes of QCOW_MAX_SNAPSHOT_EXTRA_DATA defined
> in 2/13?  (We can't keep all of the user's extra stuff, but we can at
> least try to preserve as much as possible)

On one hand, potentially cutting unknown data in half sounds like not
such a good idea to me.

On the other, a field can only be considered present if it is fully
present.  So cutting any optional data in half shouldn’t have any
negative impact.

So, yes, truncating it down to 1024 bytes sounds good.

Max

> Otherwise, looks good.
> Reviewed-by: Eric Blake 
> 




signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v3 04/33] make Device and Bus Resettable




On 7/31/19 7:56 AM, David Gibson wrote:
> On Mon, Jul 29, 2019 at 04:56:25PM +0200, Damien Hedde wrote:
>> This add Resettable interface implementation for both Bus and Device.
>>
>> *resetting* counter and *reset_is_cold* flag are added in DeviceState
>> and BusState.
>>
>> Compatibility with existing code base is ensured.
>> The legacy bus or device reset method is called in the new exit phase
>> and the other 2 phases are let empty. Using the exit phase guarantee that
>> legacy resets are called in the "post" order (ie: children then parent)
>> in hierarchical reset. That is the same order as legacy qdev_reset_all
>> or qbus_reset_all were using.
>>
>> New *device_reset* and *bus_reset* function are proposed with an
>> additional boolean argument telling whether the reset is cold or warm.
>> Helper functions *device_reset_[warm|cold]* and *bus_reset_[warm|cold]*
>> are defined also as helpers.
>>
>> Also add a [device|bus]_is_resetting and [device|bus]_is_reset_cold
>> functions telling respectively whether the object is currently under reset 
>> and
>> if the current reset is cold or not.
>>
>> Signed-off-by: Damien Hedde 
>> ---
>>  hw/core/bus.c  | 85 ++
>>  hw/core/qdev.c | 82 
>>  include/hw/qdev-core.h | 84 ++---
>>  tests/Makefile.include |  1 +
>>  4 files changed, 247 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/core/bus.c b/hw/core/bus.c
>> index 17bc1edcde..08a97addb6 100644
>> --- a/hw/core/bus.c
>> +++ b/hw/core/bus.c
>> @@ -22,6 +22,7 @@
>>  #include "qemu/module.h"
>>  #include "hw/qdev.h"
>>  #include "qapi/error.h"
>> +#include "hw/resettable.h"
>>  
>>  void qbus_set_hotplug_handler(BusState *bus, Object *handler, Error **errp)
>>  {
>> @@ -68,6 +69,75 @@ int qbus_walk_children(BusState *bus,
>>  return 0;
>>  }
>>  
>> +void bus_reset(BusState *bus, bool cold)
>> +{
>> +resettable_reset(OBJECT(bus), cold);
>> +}
>> +
>> +bool bus_is_resetting(BusState *bus)
>> +{
>> +return (bus->resetting != 0);
>> +}
>> +
>> +bool bus_is_reset_cold(BusState *bus)
>> +{
>> +return bus->reset_is_cold;
>> +}
>> +
>> +static uint32_t bus_get_reset_count(Object *obj)
>> +{
>> +BusState *bus = BUS(obj);
>> +return bus->resetting;
>> +}
>> +
>> +static uint32_t bus_increment_reset_count(Object *obj)
>> +{
>> +BusState *bus = BUS(obj);
>> +return ++bus->resetting;
>> +}
>> +
>> +static uint32_t bus_decrement_reset_count(Object *obj)
>> +{
>> +BusState *bus = BUS(obj);
>> +return --bus->resetting;
>> +}
>> +
>> +static bool bus_set_reset_cold(Object *obj, bool cold)
>> +{
>> +BusState *bus = BUS(obj);
>> +bool old = bus->reset_is_cold;
>> +bus->reset_is_cold = cold;
>> +return old;
>> +}
>> +
>> +static bool bus_set_hold_needed(Object *obj, bool hold_needed)
>> +{
>> +BusState *bus = BUS(obj);
>> +bool old = bus->reset_hold_needed;
>> +bus->reset_hold_needed = hold_needed;
>> +return old;
>> +}
>> +
>> +static void bus_foreach_reset_child(Object *obj, void (*func)(Object *))
>> +{
>> +BusState *bus = BUS(obj);
>> +BusChild *kid;
>> +
>> +QTAILQ_FOREACH(kid, &bus->children, sibling) {
>> +func(OBJECT(kid->child));
>> +}
>> +}
> 
> IIUC, every resettable class would need more or less identical
> implementations of the above.  That seems like an awful lot of
> boilerplate.

Do you mean the get/increment_count/decrement_count, set_cold/hold part ?
True, but it's limited to the base classes.
Since Resettable is an interface, we have no state there to store what
we need. Only alternative is to have some kind of single
get_resettable_state method returning a pointer to the state (allowing
us to keep the functions in the interface code).
Beyond Device and Bus, which are done here, there is probably not so
many class candidates for the Resettable interface.

Damien

[Qemu-devel] [Bug 1837049] Re: qemu-system-ppc segfaults with -display sdl

2019-07-31 Thread Andrew Randrianasulu

Hello, Richard!
No, same bug was biting me without any specific options, i tried to add -Og for 
better debugging, but backtrace was anyway not complete ... I think I can live 
with -display gtk workaround for now.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1837049

Title:
  qemu-system-ppc segfaults with -display sdl

Status in QEMU:
  New

Bug description:
  Hello.

  I was trying to debug this segfault:
  https://lists.nongnu.org/archive/html/qemu-ppc/2019-07/msg00186.html

  I recompiled latest qemu from git (commit 
0b18cfb8f1828c905139b54c8644b0d8f4aad879 ), using this configure line:
  ./configure --target-list=i386-softmmu,x86_64-softmmu,ppc-softmmu 
--audio-drv-list=alsa --disable-werror --extra-cflags="-Og" --enable-debug-tcg

  after this I tried original line under gdb, it was still segfaulting:

  --copy-
  gdb ./ppc-softmmu/qemu-system-ppc
  GNU gdb (GDB) 7.11.1
  Copyright (C) 2016 Free Software Foundation, Inc.
  License GPLv3+: GNU GPL version 3 or later 
  This is free software: you are free to change and redistribute it.
  There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
  and "show warranty" for details.
  This GDB was configured as "i586-slackware-linux".
  Type "show configuration" for configuration details.
  For bug reporting instructions, please see:
  .
  Find the GDB manual and other documentation resources online at:
  .
  For help, type "help".
  Type "apropos word" to search for commands related to "word"...
  Reading symbols from ./ppc-softmmu/qemu-system-ppc...done.
  warning: File "/dev/shm/qemu/.gdbinit" auto-loading has been declined by your 
`auto-load safe-path' set to "$debugdir:$datadir/auto-load".
  To enable execution of this file add
  add-auto-load-safe-path /dev/shm/qemu/.gdbinit
  line to your configuration file "/home/guest/.gdbinit".
  To completely disable this security protection add
  set auto-load safe-path /
  line to your configuration file "/home/guest/.gdbinit".
  For more information about this security protection see the
  "Auto-loading safe path" section in the GDB manual.  E.g., run from the shell:
  info "(gdb)Auto-loading safe path"
  (gdb) run  -M mac99,via=pmu -L ../queue-vga/pc-bios -cdrom 
/mnt/sdb1/PPC-img/lubuntu-16.04-desktop-powerpc.iso -m 512 -display sdl,gl=on 
-vga std -d guest_errors,unimp -boot d -cpu G4 -g 1024x768x24 -device ES1370
  Starting program: /dev/shm/qemu/ppc-softmmu/qemu-system-ppc -M mac99,via=pmu 
-L ../queue-vga/pc-bios -cdrom 
/mnt/sdb1/PPC-img/lubuntu-16.04-desktop-powerpc.iso -m 512 -display sdl,gl=on 
-vga std -d guest_errors,unimp -boot d -cpu G4 -g 1024x768x24 -device ES1370
  [Thread debugging using libthread_db enabled]
  Using host libthread_db library "/lib/libthread_db.so.1".
  [New Thread 0xf560cb40 (LWP 8100)]
  [New Thread 0xf4c1ab40 (LWP 8101)]
  [New Thread 0xec1b7b40 (LWP 8102)]
  [New Thread 0xc5821b40 (LWP 8104)]
  [Thread 0xf4c1ab40 (LWP 8101) exited]
  [New Thread 0xf4c1ab40 (LWP 8119)]

  Thread 4 "qemu-system-ppc" received signal SIGSEGV, Segmentation fault.
  [Switching to Thread 0xec1b7b40 (LWP 8102)]
  0xf26c2e44 in code_gen_buffer ()
  (gdb) bt full
  #0  0x in code_gen_buffer ()
  #1  0x56710cf6 in cpu_exec (itb=, cpu=) at 
/dev/shm/qemu/accel/tcg/cpu-exec.c:173
  env = 
  ret = 
  last_tb = 
  tb_exit = 
  tb_ptr = 0xf26c2cc0  "‹]ш…Ы\017ЊБ\020"
  ret = 0
  insns_left = 
  cflags = 
  tb = 0x5722fe58
  last_tb = 
  tb_exit = 
  cc = 
  __func__ = "cpu_exec"
  ret = 
  sc = 
  #2  0x56710cf6 in cpu_exec (tb_exit=, last_tb=, tb=, cpu=) at 
/dev/shm/qemu/accel/tcg/cpu-exec.c:621
  ret = 0
  insns_left = 
  cflags = 
  tb = 0x5722fe58
  last_tb = 
  tb_exit = 
  cc = 
  __func__ = "cpu_exec"
  ret = 
  sc = 
  #3  0x56710cf6 in cpu_exec (cpu=0x573db8f8) at 
/dev/shm/qemu/accel/tcg/cpu-exec.c:732
  cflags = 
  tb = 0x5722fe58
  last_tb = 
  tb_exit = 
  cc = 
  __func__ = "cpu_exec"
  ret = 
  sc = 
  #4  0x566cfade in tcg_cpu_exec (cpu=0x573db8f8) at /dev/shm/qemu/cpus.c:1435
  ret = 
  #5  0x566d1e6d in qemu_tcg_rr_cpu_thread_fn (arg=0x573db8f8) at 
/dev/shm/qemu/cpus.c:1537
  r = 
  cpu = 0x573db8f8
  __PRETTY_FUNCTION__ = "qemu_tcg_rr_cpu_thread_fn"
  #6  0x56b56fe0 in qemu_thread_start (args=0x57400668) at 
util/qemu-thread-posix.c:502
  __cancel_buf = {__cancel_jmp_buf = {{__cancel_jmp_buf = {1461911128, 
1463813736, 1461911128, -333745816, 247778263, 1392237730}, __mask_was_saved = 
0}},

Re: [Qemu-devel] [PATCH] numa: numa nodeid need not be sequential

2019-07-31 Thread Daniel Black




On Wed, 31 Jul 2019 18:40:33 +1000
Daniel Black  wrote:

> Replace all nodeid assumptions with lookups in the numa_info array

Tao Xu,

Sorry, only just noticed your patches of the move of numa_info and other
elements into MachineState. I approve of it. Lets see if I can follow
all of Igor's comments there too.

I'll be up for a V2 once merged.

Cheers,

Daniel

[Qemu-devel] [PATCH v5] net: tap: replace snprintf with g_strdup_printf calls

2019-07-31 Thread P J P

From: Prasad J Pandit 

When invoking qemu-bridge-helper in 'net_bridge_run_helper',
instead of using fixed sized buffers, use dynamically allocated
ones initialised and returned by g_strdup_printf().

If bridge name 'br_buf' is undefined, pass empty string ("") to
g_strdup_printf() in its place, to avoid printing "(null)" string.

Signed-off-by: Prasad J Pandit 
---
 net/tap.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

Update v5: add commit message about conditional 'br_buf' argument
  -> https://lists.gnu.org/archive/html/qemu-devel/2019-07/msg06397.html

diff --git a/net/tap.c b/net/tap.c
index e8aadd8d4b..fc38029f41 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -498,9 +498,9 @@ static int net_bridge_run_helper(const char *helper, const 
char *bridge,
 }
 if (pid == 0) {
 int open_max = sysconf(_SC_OPEN_MAX), i;
-char fd_buf[6+10];
-char br_buf[6+IFNAMSIZ] = {0};
-char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15];
+char *fd_buf = NULL;
+char *br_buf = NULL;
+char *helper_cmd = NULL;
 
 for (i = 3; i < open_max; i++) {
 if (i != sv[1]) {
@@ -508,17 +508,17 @@ static int net_bridge_run_helper(const char *helper, 
const char *bridge,
 }
 }
 
-snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]);
+fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]);
 
 if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
 /* assume helper is a command */
 
 if (strstr(helper, "--br=") == NULL) {
-snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
+br_buf = g_strdup_printf("%s%s", "--br=", bridge);
 }
 
-snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s",
- helper, "--use-vnet", fd_buf, br_buf);
+helper_cmd = g_strdup_printf("%s %s %s %s", helper,
+"--use-vnet", fd_buf, br_buf ? br_buf : "");
 
 parg = args;
 *parg++ = (char *)"sh";
@@ -527,10 +527,11 @@ static int net_bridge_run_helper(const char *helper, 
const char *bridge,
 *parg++ = NULL;
 
 execv("/bin/sh", args);
+g_free(helper_cmd);
 } else {
 /* assume helper is just the executable path name */
 
-snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
+br_buf = g_strdup_printf("%s%s", "--br=", bridge);
 
 parg = args;
 *parg++ = (char *)helper;
@@ -541,6 +542,8 @@ static int net_bridge_run_helper(const char *helper, const 
char *bridge,
 
 execv(helper, args);
 }
+g_free(fd_buf);
+g_free(br_buf);
 _exit(1);
 
 } else {
-- 
2.21.0

Re: [Qemu-devel] [PATCH for-4.2 09/13] qcow2: Fix overly long snapshot tables

On 30.07.19 21:08, Eric Blake wrote:
> On 7/30/19 12:25 PM, Max Reitz wrote:
>> We currently refuse to open qcow2 images with overly long snapshot
>> tables.  This patch makes qemu-img check -r all drop all offending
>> entries past what we deem acceptable.
>>
>> Signed-off-by: Max Reitz 
>> ---
>>  block/qcow2-snapshot.c | 89 +-
>>  1 file changed, 79 insertions(+), 10 deletions(-)
> 
> I'm less sure about this one.  8/13 should have no semantic effect (if
> the user _depended_ on that much extra data, they should have set an
> incompatible feature flag bit, at which point we'd leave their data
> alone because we don't recognize the feature bit; so it is safe to
> assume the user did not depend on the data and that we can thus nuke it
> with impunity).  But here, we are throwing away the user's internal
> snapshots, and not even giving them a say in which ones to throw away
> (more likely, by trimming from the end, we are destroying the most
> recent snapshots in favor of the older ones - but I could argue that
> throwing away the oldest also has its uses).

First, I don’t think there really is a legitimate use case for having an
overly long snapshot table.  In fact, I think our limit is too high as
it is and we just introduced it this way because we didn’t have any
repair functionality, and so just had to pick some limit that nobody
could ever reasonably reach.

(As the test shows, you need more than 500 snapshots with 64 kB names
and ID strings, and 1 kB of extra data to reach this limit.)

So the only likely cause to reach this number of snapshots is
corruption.  OK, so maybe we don’t need to be able to fix it, then,
because the image is corrupted anyway.

But I think we do want to be able to fix it, because otherwise you just
can’t open the image at all and thus not even read the active layer.

This gets me to: Second, it doesn’t make things worse.  Right now, we
just refuse to open such images in all cases.  I’d personally prefer
discarding some data on my image over losing it all.

And third, I wonder what interface you have in mind.  I think adding an
interface to qemu-img check to properly address this problem (letting
the user discard individual snapshots) is hard.  I could imagine two things:

(A) Making qemu-img snapshot sometimes set BDRV_O_CHECK, too, or
something.  For qemu-img snapshot -d, you don’t need to read the whole
table into memory, and thus we don’t need to impose any limit.  But that
seems pretty hackish to me.

(B) Maybe the proper solution would be to add an interactive interface
to bdrv_check().  I can imagine that in the future, we may get more
cases where we want interaction with the user on what data to delete and
so on.  But that's hard...  (I’ll try.  Good thing stdio is already the
standard interface in bdrv_check(), so I won’t have to feel bad if I go
down that route even further.)

Max

>> @@ -417,7 +461,32 @@ int coroutine_fn 
>> qcow2_check_read_snapshot_table(BlockDriverState *bs,
>>  
>>  return ret;
>>  }
>> -result->corruptions += extra_data_dropped;
>> +result->corruptions += nb_clusters_reduced + extra_data_dropped;
>> +
>> +if (nb_clusters_reduced) {
>> +/*
>> + * Update image header now, because:
>> + * (1) qcow2_check_refcounts() relies on s->nb_snapshots to be
>> + * the same as what the image header says,
>> + * (2) this leaks clusters, but qcow2_check_refcounts() will
>> + * fix that.
>> + */
>> +assert(fix & BDRV_FIX_ERRORS);
>> +
>> +snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
>> +ret = bdrv_pwrite_sync(bs->file, 60,
> 
> That '60' needs a name; it keeps popping up.
> 
> If we like the patch, I didn't spot major coding problems.  But because
> I'm not sure we want this patch, I'll skip R-b for now.
> 

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v4 3/3] net: tap: replace snprintf with g_strdup_printf calls

2019-07-31 Thread P J P

+-- On Wed, 31 Jul 2019, Jason Wang wrote --+
| The series has been merged. Just need a patch on top and I can queue it for 
| next release.

Sent patch v5. Thank you.
--
Prasad J Pandit / Red Hat Product Security Team
47AF CE69 3A90 54AA 9045 1053 DD13 3D32 FE5B 041F

Re: [Qemu-devel] [PATCH for-4.2 10/13] qcow2: Repair snapshot table with too many entries

On 30.07.19 21:10, Eric Blake wrote:
> On 7/30/19 12:25 PM, Max Reitz wrote:
>> Signed-off-by: Max Reitz 
>> ---
>>  block/qcow2-snapshot.c | 14 ++
>>  1 file changed, 14 insertions(+)
>>
> 
> Same problem as for 9/13 - should we really be throwing away the user's
> data like this?  (9/13 hits if the user has a small number of snapshots,
> but each has enough extra data, that the overall table is bigger than we
> like; 10/13 hits if the user has more snapshots than we like, but
> otherwise they do the same thing).

The same arguments apply (though the “it must be a corruption” argument
applies even more, because having more than 65536 snapshots just isn’t
right.)

Max



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [Bug 1838475] Re: qemu-system-arm exits when cortex-m4 floating point used and irq occurs

** Tags added: arm mprofile tcg

** Tags added: testcase

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1838475

Title:
  qemu-system-arm exits when cortex-m4 floating point used and irq
  occurs

Status in QEMU:
  New

Bug description:
  qemu-system-arm exits with

  "...Secure UsageFault with CFSR.NOCP because NSACR.CP10 prevents stacking FP 
regs
  ...taking pending nonsecure exception 3
  Taking exception 7 [Breakpoint]
  qemu: fatal: Lockup: can't escalate 3 to HardFault (current priority -1)" 

  when emulating Cortex-m4, executing at least 1 floating point
  instruction, and then an irq (e.g. sys tick) occurring.

  CPACR.CP10 and CPACR.CP11 are set to 0x3 respectively prior to
  executing the fp instructions.

  NOTE: NSACR does not appear to be a cortex m4 register.

  Attached is a simplified elf to repro the issue.

  The qemu command line is: "qemu-system-arm --gdb tcp::1234 -cpu
  cortex-m4 -machine lm3s6965evb -nographic -semihosting-config
  enable=on,target=native -kernel QemuExitWhenUsingFPAndIRQOccurs.elf -d
  int"

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1838475/+subscriptions

Re: [Qemu-devel] [PATCH for-4.2 12/13] iotests: Add peek_file* functions

On 30.07.19 21:22, Eric Blake wrote:
> On 7/30/19 12:25 PM, Max Reitz wrote:
>> Signed-off-by: Max Reitz 
>> ---
>>  tests/qemu-iotests/common.rc | 20 
>>  1 file changed, 20 insertions(+)
>>
>> diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
>> index 5502c3da2f..78decfd5d5 100644
>> --- a/tests/qemu-iotests/common.rc
>> +++ b/tests/qemu-iotests/common.rc
>> @@ -53,6 +53,26 @@ poke_file()
>>  printf "$3" | dd "of=$1" bs=1 "seek=$2" conv=notrunc &>/dev/null
>>  }
>>  
>> +# peek_file_le 'test.img' 512 2 => 65534
>> +peek_file_le()
>> +{
>> +# Wrap in echo $() to strip spaces
>> +echo $(od -j"$2" -N"$3" --endian=little -An -vtu"$3" "$1")
> 
> Requires coreutils' od, but we can patch that later if it proves to be a
> problem on other hosts.
> 
> I used to do something similar in nbdkit (prior to qemu-nbd --list
> making my life a lot easier; see nbdkit commit b228cb40); but there, I
> read a 16-bit value in 2 8-bit chunks and pieced it together myself
> rather than relying on --endian:
> 
> -# Protocol is big endian, we want native endian.
> -# xargs trick to trim whitespace from
> -# https://stackoverflow.com/a/12973694
> -eflags_hi=$(od -An -N1 -tx1 eflags.out | xargs)
> -eflags_lo=$(od -An -N1 -j1 -tx1 eflags.out | xargs)
> -eflags=$(( 0x$eflags_hi << 8 | 0x$eflags_lo ))
> 
> But as long as we are using --endian, your version nicely handles 1, 2,
> 4, and 8-byte reads.
> 
>> +
>> +# peek_file_raw 'test.img' 512 2 => '\xff\xfe'
>> +peek_file_raw()
>> +{
>> +dd if="$1" bs=1 skip="$2" count="$3" status=none
>> +}
> 
> Of course, calling $(peek_file_raw ...) is a bad idea, because it might
> eat a trailing byte that happened to be a newline; it also doesn't
> handle NUL bytes very well.  Is it worth documenting caveats for using
> this one?

In my experience, it handled NUL bytes so well that I had to tr -d them
away. :-)

I mean, isn’t the problem in the caller, then?

Max

> Reviewed-by: Eric Blake 



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v3 05/33] Switch to new api in qdev/bus




On 7/31/19 8:05 AM, David Gibson wrote:
> On Mon, Jul 29, 2019 at 04:56:26PM +0200, Damien Hedde wrote:
>> Deprecate old reset apis and make them use the new one while they
>> are still used somewhere.
>>
>> Signed-off-by: Damien Hedde 
>> ---
>>  hw/core/qdev.c | 22 +++---
>>  include/hw/qdev-core.h | 28 ++--
>>  2 files changed, 25 insertions(+), 25 deletions(-)
>>
>> diff --git a/hw/core/qdev.c b/hw/core/qdev.c
>> index 559ced070d..e9e5f2d5f9 100644
>> --- a/hw/core/qdev.c
>> +++ b/hw/core/qdev.c
>> @@ -312,25 +312,9 @@ static void device_foreach_reset_child(Object *obj, 
>> void (*func)(Object *))
>>  }
>>  }
>>  
>> -static int qdev_reset_one(DeviceState *dev, void *opaque)
>> -{
>> -device_legacy_reset(dev);
>> -
>> -return 0;
>> -}
>> -
>> -static int qbus_reset_one(BusState *bus, void *opaque)
>> -{
>> -BusClass *bc = BUS_GET_CLASS(bus);
>> -if (bc->reset) {
>> -bc->reset(bus);
>> -}
>> -return 0;
>> -}
>> -
>>  void qdev_reset_all(DeviceState *dev)
>>  {
>> -qdev_walk_children(dev, NULL, NULL, qdev_reset_one, qbus_reset_one, 
>> NULL);
>> +device_reset(dev, false);
>>  }
>>  
>>  void qdev_reset_all_fn(void *opaque)
>> @@ -340,7 +324,7 @@ void qdev_reset_all_fn(void *opaque)
>>  
>>  void qbus_reset_all(BusState *bus)
>>  {
>> -qbus_walk_children(bus, NULL, NULL, qdev_reset_one, qbus_reset_one, 
>> NULL);
>> +bus_reset(bus, false);
>>  }
>>  
>>  void qbus_reset_all_fn(void *opaque)
>> @@ -922,7 +906,7 @@ static void device_set_realized(Object *obj, bool value, 
>> Error **errp)
>>  }
>>  }
>>  if (dev->hotplugged) {
>> -device_legacy_reset(dev);
>> +device_reset(dev, true);
> 
> So.. is this change in the device_reset() signature really necessary?
> Even if there are compelling reasons to handle warm reset in the new
> API, that doesn't been you need to change device_reset() itself from
> its established meaning of a cold (i.e. as per power cycle) reset.
> Warm resets are generally called in rather more specific circumstances
> (often under guest software direction) so it seems likely that users
> would want to engage with the new reset API directly.  Or we could
> just create a device_warm_reset() wrapper.  That would also avoid the
> bare boolean parameter, which is not great for readability (you have
> to look up the signature to have any idea what it means).

I've added device_reset_cold/warm wrapper functions to avoid having to
pass the boolean parameter. it seems I forgot to use them in qdev.c
I suppose, like you said, we could live with
+ no function with the boolean parameter
+ device_reset doing cold reset
+ device_reset_warm (or device_warm_reset) for the warm version

Damien

[Qemu-devel] [Bug 1838465] Re: qemu-system-x86_64 kernel panic 30% of the time starting up VM

Is this using TCG (i.e. emulation) rather than Hyper V virtualisation?

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1838465

Title:
  qemu-system-x86_64 kernel panic 30% of the time starting up VM

Status in QEMU:
  New

Bug description:
  I have created a Fedora Core 5 x86_64 VM image. When I run the image
  using QEMU on Windows the VM hangs while loading the kernel about 30%
  of the time. I am trying to use this VM with a CI software, looking at
  the history the build failed 27 out of 79 attempts. QEMU 3.0.0 is
  installed on the CI machine. I have tried using the exact same image
  using QEMU on Linux (Ubuntu) and found the image boot successful every
  time (40+ attempts). The VM image is fairly old it was created using
  QEMU 0.11.1.

  I have tried multiple versions on QEMU on windows; 0.11.1, 2.12.1, and
  3.0.0 all of them fail randomly. I can reproduce the issue on several
  different Windows 10 computers.

  The command I am using to start the VM is “qemu-system-x86_64.exe -cpu
  qemu64 -smp cores=2 -device e1000,netdev=net0 -boot menu=off -m 1G
  -drive `"file=C:\qimages\Fedora-Core-5-x64.qcow2,index=0,media=disk`"
  -snapshot -netdev user,id=net0,hostfwd=tcp::10022-:22”

  I can provide the qcow image but it is somewhat large coming it at
  4.15GB so I’m not sure what would be the best way to transfer it.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1838465/+subscriptions

[Qemu-devel] [Bug 1837851] Re: hv-tlbflush malfunctions on Intel host CPUs with neither EPT nor VPID (qemu-kvm)

** Tags added: kvm x86

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1837851

Title:
  hv-tlbflush malfunctions on Intel host CPUs with neither EPT nor VPID
  (qemu-kvm)

Status in QEMU:
  New

Bug description:
  Enabling hv-tlbflush on older hosts using Intel CPUs supporting VT-x
  but neither EPT nor VPID will lead to bluescreens on the guest.

  It seems KVM only checks if EPT is available, and if it isn't it
  forcibly uses VPID. If that's *also* not available, it defaults to
  basically a no-op hypercall, though windows is expecting the TLB to be
  flushed.

  hv-tlbflush is pretty useless on machines not supporting these
  extensions anyway (only reasonably fix I can see would be to flush the
  *entire* TLB on tlbflush hypercall in KVM (i.e. a kernel fix), but
  that would remove any performance benefits), so I would suggest some
  kind of preliminary check and warning/error if hv-tlbflush is
  specified on such a host.

  All CPUs mentioned in this thread[0] are confirmed to be affected by
  the bug, and I have successfully reproduced it on an Intel Core2Duo
  E8500.

  [0] https://forum.proxmox.com/threads/windows-guest-bluescreen-with-
  proxmox-6.56053/

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1837851/+subscriptions

[Qemu-devel] [Bug 1837347] Re: guest userspace process core dump after raspi2 kernel boot

** Tags added: tcg

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1837347

Title:
  guest userspace process core dump after raspi2 kernel boot

Status in QEMU:
  New

Bug description:
  Host info:
  ==
  x86-64, Ubuntu 18.04, QEMU 4.0.0 (downloaded tarball from main site)

  Guest info:
  ===
  ARM7l, Raspbian OS off the main raspberry pi site

  QEMU command:
  =
  qemu-system-arm -M raspi2 -kernel bootpart/kernel7.img -dtb 
bootpart/bcm2709-rpi-2-b.dtb -drive 
file=2019-07-10-raspbian-buster.img,format=raw,if=sd -append "rw earlyprintk 
console=ttyAMA0,115200 fsck.repair=yes rootwait memtest=1 loglevel=8 
dwc_otg.lpm_enable=0 root=/dev/mmcblk0p2" -serial stdio

  kernel7.img and bcm2709-rpi-2-b.dtb were obtained by the following
  commands:

  guestfish --ro -a 2019-07-10-raspbian-buster.img -m /dev/sda1
  > copy-out / bootpart/
  > quit

  Output:
  ===

  https://pastebin.com/fL1eXhV0

  References:
  ===
  
https://translatedcode.wordpress.com/2016/11/03/installing-debian-on-qemus-32-bit-arm-virt-board/
  
https://translatedcode.wordpress.com/2018/04/25/debian-on-qemus-raspberry-pi-3-model/

  
  The core dump error can occur at both times, before logging in and after 
logging in, in this case I have given the output after logging in to show the 
initial processes running.

  Also please let me know if I using any kernel flags incorrectly

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1837347/+subscriptions

[Qemu-devel] [Bug 1836501] Re: cpu_address_space_init fails with assertion

** Tags added: kvm

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1836501

Title:
  cpu_address_space_init fails with assertion

Status in QEMU:
  Confirmed

Bug description:
  qemu-system-arm does not start with version >= 2.6 and KVM enabled.

cpu_address_space_init: Assertion `asidx == 0 || !kvm_enabled()'
  failed.

  Hardware is Odroid XU4 with Exynos with 4.9.61+ Tested with Debian
  Stretch (9) or Buster (10).

  Without KVM it is running fine but slow. I'm operating Debian Jessie
  with qemu 2.1 for a long time with KVM virtualization working
  flawlessly. When I upgraded to Stretch I ran into the trouble
  described before. I tried Debian Stretch and Buster with all Kernels
  provided by the Board manufacturer (Hardkernel).

  It seems to be related to the feature introduced in Version 2.6:
  https://wiki.qemu.org/ChangeLog/2.6
  - Support for a separate EL3 address space

  KVM is enabled, so I assume the adress space index asidx to be causing
  the assert to fail.

  dmesg | grep -i KVM
  [0.741714] kvm [1]: 8-bit VMID
  [0.741721] kvm [1]: IDMAP page: 40201000
  [0.741729] kvm [1]: HYP VA range: c000:
  [0.742543] kvm [1]: Hyp mode initialized successfully
  [0.742600] kvm [1]: vgic-v2@10484000
  [0.742924] kvm [1]: vgic interrupt IRQ16
  [0.742943] kvm [1]: virtual timer IRQ60

  Full command line is:
  qemu-system-arm -M vexpress-a15 -smp 2 -m 512 -cpu host -enable-kvm -kernel 
vmlinuz -initrd initrd.gz -dtb vexpress-v2p-ca15-tc1.dtb -device 
virtio-blk-device,drive=inst-blk -drive 
file=PATHTOFILE,id=inst-blk,if=none,format=raw -append "vga=normal rw 
console=ttyAMA0" -nographic

  Is there anything to do to understand, if this is a hardware related
  failure or probably just a missing parameter?

  Regards

  Lutz

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1836501/+subscriptions

Re: [Qemu-devel] [PATCH v5] net: tap: replace snprintf with g_strdup_printf calls

2019-07-31 Thread no-reply

Patchew URL: https://patchew.org/QEMU/20190731091933.17363-1-ppan...@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Subject: [Qemu-devel] [PATCH v5] net: tap: replace snprintf with 
g_strdup_printf calls
Message-id: 20190731091933.17363-1-ppan...@redhat.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag] patchew/20190731091933.17363-1-ppan...@redhat.com -> 
patchew/20190731091933.17363-1-ppan...@redhat.com
Submodule 'capstone' (https://git.qemu.org/git/capstone.git) registered for 
path 'capstone'
Submodule 'dtc' (https://git.qemu.org/git/dtc.git) registered for path 'dtc'
Submodule 'roms/QemuMacDrivers' (https://git.qemu.org/git/QemuMacDrivers.git) 
registered for path 'roms/QemuMacDrivers'
Submodule 'roms/SLOF' (https://git.qemu.org/git/SLOF.git) registered for path 
'roms/SLOF'
Submodule 'roms/edk2' (https://git.qemu.org/git/edk2.git) registered for path 
'roms/edk2'
Submodule 'roms/ipxe' (https://git.qemu.org/git/ipxe.git) registered for path 
'roms/ipxe'
Submodule 'roms/openbios' (https://git.qemu.org/git/openbios.git) registered 
for path 'roms/openbios'
Submodule 'roms/openhackware' (https://git.qemu.org/git/openhackware.git) 
registered for path 'roms/openhackware'
Submodule 'roms/opensbi' (https://git.qemu.org/git/opensbi.git) registered for 
path 'roms/opensbi'
Submodule 'roms/qemu-palcode' (https://git.qemu.org/git/qemu-palcode.git) 
registered for path 'roms/qemu-palcode'
Submodule 'roms/seabios' (https://git.qemu.org/git/seabios.git/) registered for 
path 'roms/seabios'
Submodule 'roms/seabios-hppa' (https://git.qemu.org/git/seabios-hppa.git) 
registered for path 'roms/seabios-hppa'
Submodule 'roms/sgabios' (https://git.qemu.org/git/sgabios.git) registered for 
path 'roms/sgabios'
Submodule 'roms/skiboot' (https://git.qemu.org/git/skiboot.git) registered for 
path 'roms/skiboot'
Submodule 'roms/u-boot' (https://git.qemu.org/git/u-boot.git) registered for 
path 'roms/u-boot'
Submodule 'roms/u-boot-sam460ex' (https://git.qemu.org/git/u-boot-sam460ex.git) 
registered for path 'roms/u-boot-sam460ex'
Submodule 'slirp' (https://git.qemu.org/git/libslirp.git) registered for path 
'slirp'
Submodule 'tests/fp/berkeley-softfloat-3' 
(https://git.qemu.org/git/berkeley-softfloat-3.git) registered for path 
'tests/fp/berkeley-softfloat-3'
Submodule 'tests/fp/berkeley-testfloat-3' 
(https://git.qemu.org/git/berkeley-testfloat-3.git) registered for path 
'tests/fp/berkeley-testfloat-3'
Submodule 'ui/keycodemapdb' (https://git.qemu.org/git/keycodemapdb.git) 
registered for path 'ui/keycodemapdb'
Cloning into 'capstone'...
Submodule path 'capstone': checked out 
'22ead3e0bfdb87516656453336160e0a37b066bf'
Cloning into 'dtc'...
Submodule path 'dtc': checked out '88f18909db731a627456f26d779445f84e449536'
Cloning into 'roms/QemuMacDrivers'...
Submodule path 'roms/QemuMacDrivers': checked out 
'90c488d5f4a407342247b9ea869df1c2d9c8e266'
Cloning into 'roms/SLOF'...
Submodule path 'roms/SLOF': checked out 
'ba1ab360eebe6338bb8d7d83a9220ccf7e213af3'
Cloning into 'roms/edk2'...
Submodule path 'roms/edk2': checked out 
'20d2e5a125e34fc8501026613a71549b2a1a3e54'
Submodule 'SoftFloat' (https://github.com/ucb-bar/berkeley-softfloat-3.git) 
registered for path 'ArmPkg/Library/ArmSoftFloatLib/berkeley-softfloat-3'
Submodule 'CryptoPkg/Library/OpensslLib/openssl' 
(https://github.com/openssl/openssl) registered for path 
'CryptoPkg/Library/OpensslLib/openssl'
Cloning into 'ArmPkg/Library/ArmSoftFloatLib/berkeley-softfloat-3'...
Submodule path 'roms/edk2/ArmPkg/Library/ArmSoftFloatLib/berkeley-softfloat-3': 
checked out 'b64af41c3276f97f0e181920400ee056b9c88037'
Cloning into 'CryptoPkg/Library/OpensslLib/openssl'...
Submodule path 'roms/edk2/CryptoPkg/Library/OpensslLib/openssl': checked out 
'50eaac9f3337667259de725451f201e784599687'
Submodule 'boringssl' (https://boringssl.googlesource.com/boringssl) registered 
for path 'boringssl'
Submodule 'krb5' (https://github.com/krb5/krb5) registered for path 'krb5'
Submodule 'pyca.cryptography' (https://github.com/pyca/cryptography.git) 
registered for path 'pyca-cryptography'
Cloning into 'boringssl'...
Submodule path 'roms/edk2/CryptoPkg/Library/OpensslLib/openssl/boringssl': 
checked out '2070f8ad9151dc8f3a73bffaa146b5e6937a583f'
Cloning into 'krb5'...
Submodule path 'roms/edk2/CryptoPkg/Library/OpensslLib/openssl/krb5': checked 
out 'b9ad6c49505c96a088326b62a52568e3484f2168'
Cloning into 'pyca-cryptography'...
Submodule path 
'roms/edk2/CryptoPkg/Library/OpensslLib/openssl/pyca-cryptography': checked out 
'09403100de2f6f1cdd0d484dcb8e620f1c335c8f'
Cloning into 'roms/ipxe'...

Re: [Qemu-devel] [PATCH for-4.2 13/13] iotests: Test qcow2's snapshot table handling

On 30.07.19 21:56, Eric Blake wrote:
> On 7/30/19 12:25 PM, Max Reitz wrote:
>> Add a test how our qcow2 driver handles extra data in snapshot table
>> entries, and how it repairs overly long snapshot tables.
> 
> May need tweaking if we drop 9 and 10.
> 
>>
>> Signed-off-by: Max Reitz 
>> ---
>>  tests/qemu-iotests/261 | 449 +
>>  tests/qemu-iotests/261.out | 321 ++
>>  tests/qemu-iotests/group   |   1 +
>>  3 files changed, 771 insertions(+)
>>  create mode 100755 tests/qemu-iotests/261
>>  create mode 100644 tests/qemu-iotests/261.out
>>
>> +
>> +# Parameters:
>> +#   $1: image filename
>> +#   $2: snapshot table entry offset in the image
>> +snapshot_table_entry_size()
>> +{
>> +id_len=$(peek_file_be "$1" $(($2 + 12)) 2)
>> +name_len=$(peek_file_be "$1" $(($2 + 14)) 2)
>> +extra_len=$(peek_file_be "$1" $(($2 + 36)) 4)
>> +
>> +full_len=$((40 + extra_len + id_len + name_len))
>> +if [ $((full_len % 8)) = 0 ]; then
>> +echo $full_len
>> +else
>> +echo $((full_len + 8 - full_len % 8))
> 
> Could replace the entire if with:
>  echo $(( (full_len + 7) / 8 * 8 ))
> but what you have works.

Ah, sure.

>> +fi
>> +}
>> +
>> +# Parameter:
>> +#   $1: image filename
>> +print_snapshot_table()
>> +{
>> +nb_entries=$(peek_file_be "$1" 60 4)
>> +offset=$(peek_file_be "$1" 64 8)
>> +
>> +echo "Snapshots in $1:" | _filter_testdir | _filter_imgfmt
> 
> Should a separate patch add support in 'qemu-img info'/'qemu-img
> snapshot -l' for letting users know how much extra info is in each
> snapshot?  It seems useful enough without having to recode this
> low-level iotest introspection.

To me, it doesn’t seem really useful right now, as all qemu-created
images (past 1.1) will have the same 16 bytes.

>> +
>> +for ((i = 0; i < nb_entries; i++)); do
>> +id_len=$(peek_file_be "$1" $((offset + 12)) 2)
>> +name_len=$(peek_file_be "$1" $((offset + 14)) 2)
>> +extra_len=$(peek_file_be "$1" $((offset + 36)) 4)
>> +
>> +extra_ofs=$((offset + 40))
>> +id_ofs=$((extra_ofs + extra_len))
>> +name_ofs=$((id_ofs + id_len))
>> +
>> +echo "  [$i]"
>> +echo "ID: $(peek_file_raw "$1" $id_ofs $id_len)"
>> +echo "Name: $(peek_file_raw "$1" $name_ofs $name_len)"
> 
> We're relying on the files having sane strings at those offsets - but
> that's fine for the iotest.
> 
>> +echo "Extra data size: $extra_len"
>> +if [ $extra_len -ge 8 ]; then
>> +echo "VM state size: $(peek_file_be "$1" $extra_ofs 8)"
>> +fi
>> +if [ $extra_len -ge 16 ]; then
>> +echo "Disk size: $(peek_file_be "$1" $((extra_ofs + 8)) 8)"
>> +fi
>> +if [ $extra_len -gt 16 ]; then
>> +echo 'Unknown extra data:' \
>> +"$(peek_file_raw "$1" $((extra_ofs + 16)) $((extra_len - 
>> 16)) \
>> +   | tr -d '\0')"
> 
> Printing the unknown extra data seems fishy, especially if you are going
> to sanitize out the NUL bytes.  An od dump of every byte might be more
> useful, but I'd also be happy with just printing the number of unknown
> bytes without actually worrying about printing the contents of those bytes.

It’s a test, I know exactly what the extra data is (supposed to be).

(namely “very important data\0\0\0\0\0\0\0”)

[...]

>> +# We only need the fixed part, though.
>> +truncate -s 40 "$TEST_DIR/sn0"
>> +
>> +# 65535-char ID string
>> +poke_file "$TEST_DIR/sn0" 12 '\xff\xff'
>> +# 65535-char name
>> +poke_file "$TEST_DIR/sn0" 14 '\xff\xff'
> 
> Do we care that there are NUL bytes in the id and name?  (The spec is
> clear that id and name are not NUL-terminated, but does not actually
> seem to forbid the use of arbitrary binary values as names...)

Right now we don’t care.  Which is good for me, because anything else
would make this test even slower than it already is (writing a different
name and ID into every snapshot would be a pain).

(It’s even worse for the next case.  There is a reason I do it for v2
only, where fully-zero snapshot table entries are valid.  It takes a
long time just to write a '16' into every one of >65536 entries.)

Max

[...]

> Overall, looks like a nice test.  I'm comfortable giving:
> 
> Reviewed-by: Eric Blake 

Again, thanks for reviewing!



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [Bug 1838277] Re: qemu-system-aarch64: regression in 3.1: breakpoint instructions always routed to EL_D even when current EL is higher

2019-07-31 Thread Elouan Appéré

Thanks a lot for the patch!

Just nitpicking here, but commit message and in particular wiki
changelog message (in 4.1/Planning) make it seem it was only an EL2
issue. I think it was also affecting EL3 (patch fixes both, anyway).

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1838277

Title:
  qemu-system-aarch64: regression in 3.1: breakpoint instructions always
  routed to EL_D even when current EL is higher

Status in QEMU:
  In Progress

Bug description:
  Affects 3.1.0 (latest stable release) and latest commit
  (893dc8300c80e3dc32f31e968cf7aa0904da50c3) but did *not* affect 2.11
  (qemu from bionic ubuntu LTS).

  With the following code and shell commands:

  test.s:

  .text
  mov x0, #0x6000
  msr vbar_el2, x0
  dsb sy
  isb sy

  $ aarch64-none-elf-as test.s -o test.o
  $ aarch64-none-elf-objcopy -S -O binary test.o test.bin
  $ qemu-system-aarch64 -nographic -machine virt,virtualization=on -cpu 
cortex-a57 -kernel test.bin -s -S

  vbar_el2 is still 0 after the code, instead of being the expected
  0x6000. (see screenshot).

  This regression doesn't seem to happen for vbar_el1 &
  virtualization=off.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1838277/+subscriptions

[Qemu-devel] [PATCH v4 1/1] configure: Define target access alignment in configure

2019-07-31 Thread tony.nguyen

Rename ALIGNED_ONLY to TARGET_ALIGNED_ONLY for clarity and move
defines out of target/foo/cpu.h into configure, as we do with
TARGET_WORDS_BIGENDIAN, so that it is always defined early.

Poison TARGET_ALIGNED_ONLY to prevent use in common code.

Signed-off-by: Tony Nguyen 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
---
 configure | 10 +-
 include/exec/poison.h |  1 +
 include/qom/cpu.h |  2 +-
 target/alpha/cpu.h|  2 --
 target/hppa/cpu.h |  1 -
 target/mips/cpu.h |  2 --
 target/sh4/cpu.h  |  2 --
 target/sparc/cpu.h|  2 --
 target/xtensa/cpu.h   |  2 --
 tcg/tcg.c |  2 +-
 tcg/tcg.h |  8 +---
 11 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/configure b/configure
index 714e7fb6a1..482ba0b240 100755
--- a/configure
+++ b/configure
@@ -7431,8 +7431,13 @@ for target in $target_list; do
 target_dir="$target"
 config_target_mak=$target_dir/config-target.mak
 target_name=$(echo $target | cut -d '-' -f 1)
+target_aligned_only="no"
+case "$target_name" in
+  
alpha|hppa|mips64el|mips64|mipsel|mips|mipsn32|mipsn32el|sh4|sh4eb|sparc|sparc64|sparc32plus|xtensa|xtensaeb)
+  target_aligned_only="yes"
+  ;;
+esac
 target_bigendian="no"
-
 case "$target_name" in
   
armeb|aarch64_be|hppa|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or1k|ppc|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
   target_bigendian=yes
@@ -7717,6 +7722,9 @@ fi
 if supported_whpx_target $target; then
 echo "CONFIG_WHPX=y" >> $config_target_mak
 fi
+if test "$target_aligned_only" = "yes" ; then
+  echo "TARGET_ALIGNED_ONLY=y" >> $config_target_mak
+fi
 if test "$target_bigendian" = "yes" ; then
   echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
 fi
diff --git a/include/exec/poison.h b/include/exec/poison.h
index b862320fa6..955eb863ab 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -35,6 +35,7 @@
 #pragma GCC poison TARGET_UNICORE32
 #pragma GCC poison TARGET_XTENSA
 
+#pragma GCC poison TARGET_ALIGNED_ONLY
 #pragma GCC poison TARGET_HAS_BFLT
 #pragma GCC poison TARGET_NAME
 #pragma GCC poison TARGET_SUPPORTS_MTTCG
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 5ee0046b62..9b50b73339 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -89,7 +89,7 @@ struct TranslationBlock;
  * @do_unassigned_access: Callback for unassigned access handling.
  * (this is deprecated: new targets should use do_transaction_failed instead)
  * @do_unaligned_access: Callback for unaligned access handling, if
- * the target defines #ALIGNED_ONLY.
+ * the target defines #TARGET_ALIGNED_ONLY.
  * @do_transaction_failed: Callback for handling failed memory transactions
  * (ie bus faults or external aborts; not MMU faults)
  * @virtio_is_big_endian: Callback to return %true if a CPU which supports
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index b3e8a823e1..16eb8047cf 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -23,8 +23,6 @@
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
 
-#define ALIGNED_ONLY
-
 /* Alpha processors have a weak memory model */
 #define TCG_GUEST_DEFAULT_MO  (0)
 
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index aab251bc4b..2be67c289a 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -30,7 +30,6 @@
basis.  It's probably easier to fall back to a strong memory model.  */
 #define TCG_GUEST_DEFAULT_MOTCG_MO_ALL
 
-#define ALIGNED_ONLY
 #define MMU_KERNEL_IDX   0
 #define MMU_USER_IDX 3
 #define MMU_PHYS_IDX 4
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index 21c0615e02..c13cd4eb31 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -1,8 +1,6 @@
 #ifndef MIPS_CPU_H
 #define MIPS_CPU_H
 
-#define ALIGNED_ONLY
-
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
 #include "fpu/softfloat.h"
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index aee733eaaa..ecaa7a18a9 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -23,8 +23,6 @@
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
 
-#define ALIGNED_ONLY
-
 /* CPU Subtypes */
 #define SH_CPU_SH7750  (1 << 0)
 #define SH_CPU_SH7750S (1 << 1)
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index 8ed2250cd0..1406f0ba2e 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -5,8 +5,6 @@
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
 
-#define ALIGNED_ONLY
-
 #if !defined(TARGET_SPARC64)
 #define TARGET_DPREGS 16
 #else
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index 2c277134f1..0459243e6b 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -32,8 +32,6 @@
 #include "exec/cpu-defs.h"
 #include "xtensa-isa.h"
 
-#define ALIGNED_ONLY
-
 /* Xtensa processors have a weak memory model */
 #define TCG_GUEST_DEFAULT_MO  (0)
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index be2c33c400..8d23fb0592 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1926,7 +1926,7 @@ static const char * const ldst_name[] =
 };
 
 static const char * const alig

Re: [Qemu-devel] [PATCH 1/3] block/backup: deal with zero detection

30.07.2019 21:40, John Snow wrote:
> 
> 
> On 7/30/19 12:32 PM, Vladimir Sementsov-Ogievskiy wrote:
>> We have detect_zeroes option, so at least for blockdev-backup user
>> should define it if zero-detection is needed. For drive-backup leave
>> detection enabled by default but do it through existing option instead
>> of open-coding.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>> ---
>>   block/backup.c | 15 ++-
>>   blockdev.c |  8 
>>   2 files changed, 10 insertions(+), 13 deletions(-)
>>
>> diff --git a/block/backup.c b/block/backup.c
>> index 715e1d3be8..f4aaf08df3 100644
>> --- a/block/backup.c
>> +++ b/block/backup.c
>> @@ -110,7 +110,10 @@ static int coroutine_fn 
>> backup_cow_with_bounce_buffer(BackupBlockJob *job,
>>   BlockBackend *blk = job->common.blk;
>>   int nbytes;
>>   int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
>> -int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 
>> 0;
>> +int write_flags =
>> +(job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0) |
>> +(job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
>> +
>>   
>>   assert(QEMU_IS_ALIGNED(start, job->cluster_size));
>>   hbitmap_reset(job->copy_bitmap, start, job->cluster_size);
>> @@ -128,14 +131,8 @@ static int coroutine_fn 
>> backup_cow_with_bounce_buffer(BackupBlockJob *job,
>>   goto fail;
>>   }
>>   
>> -if (buffer_is_zero(*bounce_buffer, nbytes)) {
>> -ret = blk_co_pwrite_zeroes(job->target, start,
>> -   nbytes, write_flags | 
>> BDRV_REQ_MAY_UNMAP);
>> -} else {
>> -ret = blk_co_pwrite(job->target, start,
>> -nbytes, *bounce_buffer, write_flags |
>> -(job->compress ? BDRV_REQ_WRITE_COMPRESSED : 
>> 0));
>> -}
>> +ret = blk_co_pwrite(job->target, start, nbytes, *bounce_buffer,
>> +write_flags);
>>   if (ret < 0) {
>>   trace_backup_do_cow_write_fail(job, start, ret);
>>   if (error_is_read) {
>> diff --git a/blockdev.c b/blockdev.c
>> index 4d141e9a1f..a94d754504 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -3434,7 +3434,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, 
>> JobTxn *txn,
>>   BlockJob *job = NULL;
>>   BdrvDirtyBitmap *bmap = NULL;
>>   AioContext *aio_context;
>> -QDict *options = NULL;
>> +QDict *options;
>>   Error *local_err = NULL;
>>   int flags, job_flags = JOB_DEFAULT;
>>   int64_t size;
>> @@ -3529,10 +3529,10 @@ static BlockJob *do_drive_backup(DriveBackup 
>> *backup, JobTxn *txn,
>>   goto out;
>>   }
>>   
>> +options = qdict_new();
>> +qdict_put_str(options, "discard", "unmap");
>> +qdict_put_str(options, "detect-zeroes", "unmap");
>>   if (backup->format) {
>> -if (!options) {
>> -options = qdict_new();
>> -}
>>   qdict_put_str(options, "driver", backup->format);
>>   }
>>   
>>
> 
> I'm less sure of this one personally. Is it right to always try to set
> unmap on the target?
> 
> I like the idea of removing special cases and handling things more
> centrally though, but I'll want Max (or Kevin) to take a peek.
> 
> --js
> 


If nobody minds I'd agree with you to drop zero detecting from both backups.


-- 
Best regards,
Vladimir

Re: [Qemu-devel] [PATCH v3 09/33] add doc about Resettable interface




On 7/31/19 8:30 AM, David Gibson wrote:
> On Mon, Jul 29, 2019 at 04:56:30PM +0200, Damien Hedde wrote:
>> Signed-off-by: Damien Hedde 
>> ---
>>  docs/devel/reset.txt | 165 +++
>>  1 file changed, 165 insertions(+)
>>  create mode 100644 docs/devel/reset.txt
>>
>> diff --git a/docs/devel/reset.txt b/docs/devel/reset.txt
>> new file mode 100644
>> index 00..c7a1eb068f
>> --- /dev/null
>> +++ b/docs/devel/reset.txt
>> @@ -0,0 +1,165 @@
>> +
>> +=
>> +Reset
>> +=
>> +
>> +The reset of qemu objects is handled using the Resettable interface declared
>> +in *include/hw/resettable.h*.
>> +As of now DeviceClass and BusClass implement this interface.
>> +
>> +
>> +Triggering reset
>> +
>> +
>> +The function *resettable_reset* is used to trigger a reset on a given
>> +object.
>> +void resettable_reset(Object *obj, bool cold)
>> +
>> +The parameter *obj* must implement the Resettable interface.
> 
> And what happens if it doesn't?  This function has no way to report an
> error.

In the function, while retrieving the Resettable class, there is an
assert checking the obj is compatible. We could put an error argument
there to report that if that's preferable.
But then it means an error object should be given for every reset call.

> 
>> +The parameter *cold* is a boolean specifying whether to do a cold or warm
>> +reset
> 
> This doc really needs to explain the distinction between cold and warm
> reset.

ok

> 
>> +For Devices and Buses there is also the corresponding helpers:
>> +void device_reset(Device *dev, bool cold)
>> +void bus_reset(Device *dev, bool cold)
> 
> What's the semantic difference between resetting a bus and resetting
> the bridge device which owns it?

I can't speak for specific cases.
BusClass has already a reset method and qbus_reset_all is used as well
as qdev_reset_all in current code base. Currently both devices and buses
are used as reset entry point. I'm just keeping it that way.

> 
>> +If one wants to put an object into a reset state. There is the
>> +*resettable_assert_reset* function.
>> +void resettable_assert_reset(Object *obj, bool cold)
>> +
>> +One must eventually call the function *resettable_deassert_reset* to end the
>> +reset state:
>> +void resettable_deassert_reset(Object *obj, bool cold)
>> +
>> +Calling *resettable_assert_reset* then *resettable_deassert_reset* is the
>> +same as calling *resettable_reset*.
>> +
>> +It is possible to interleave multiple calls to
>> + - resettable_reset,
>> + - resettable_assert_reset, and
>> + - resettable_deassert_reset.
>> +The only constraint is that *resettable_deassert_reset* must be called once
>> +per *resettable_assert_reset* call so that the object leaves the reset 
>> state.
>> +
>> +Therefore there may be several reset sources/controllers of a given object.
>> +The interface handle everything and the controllers do not need to know
>> +anything about each others. The object will leave reset state only when all
>> +controllers released their reset.
>> +
>> +All theses functions must called while holding the iothread lock.
>> +
>> +
>> +Implementing reset for a Resettable object : Multi-phase reset
>> +--
>> +
>> +The Resettable uses a multi-phase mechanism to handle some ordering 
>> constraints
>> +when resetting multiple object at the same time. For a given object the 
>> reset
>> +procedure is split into three different phases executed in order:
>> + 1 INIT: This phase should set/reset the state of the Resettable it has 
>> when is
>> + in reset state. Side-effects to others object is forbidden (such as
>> + setting IO level).
>> + 2 HOLD: This phase corresponds to the external side-effects due to staying 
>> into
>> + the reset state.
>> + 3 EXIT: This phase corresponds to leaving the reset state. It have both
>> + local and external effects.
>> +
>> +*resettable_assert_reset* does the INIT and HOLD phases. While
>> +*resettable_deassert_reset* does the EXIT phase.
>> +
>> +When resetting multiple object at the same time. The interface executes the
>> +given phase of the objects before going to the next phase. This guarantee 
>> that
>> +all INIT phases are done before any HOLD phase and so on.
>> +
>> +There is three methods in the interface so must be implemented in an object.
>> +The methods corresponds to the three phases:
>> +```
>> +typedef void (*ResettableInitPhase)(Object *obj);
>> +typedef void (*ResettableHoldPhase)(Object *obj);
>> +typedef void (*ResettableExitPhase)(Object *obj);
>> +typedef struct ResettableClass {
>> +InterfaceClass parent_class;
>> +
>> +struct ResettablePhases {
>> +ResettableInitPhase init;
>> +ResettableHoldPhase hold;
>> +ResettableExitPhase exit;
>> +} phases;
>> +[...]
>> +} ResettableClass;
>> +```
>> +
>> +Theses methods should be updated when specializing an object. For this the
>> +

Re: [Qemu-devel] [PATCH v3 08/33] Add function to control reset with gpio inputs




On 7/31/19 8:11 AM, David Gibson wrote:
> On Mon, Jul 29, 2019 at 04:56:29PM +0200, Damien Hedde wrote:
>> It adds the possibility to add 2 gpios to control the warm and cold reset.
>> With theses ios, the reset can be maintained during some time.
>> Each io is associated with a state to detect level changes.
>>
>> Vmstate subsections are also added to the existsing device_reset
>> subsection.
> 
> This doesn't seem like a thing that should be present on every single
> DeviceState.

I can revert to previous version where the io state has to be explicitly
added in devices using it.

Damien

Re: [Qemu-devel] [PATCH v3 01/33] Create Resettable QOM interface

2019-07-31 Thread Christophe de Dinechin



Peter Maydell writes:

> On Tue, 30 Jul 2019 at 14:56, Cornelia Huck  wrote:
>>
>> On Tue, 30 Jul 2019 14:44:21 +0100
>> Peter Maydell  wrote:
>>
>> > On Tue, 30 Jul 2019 at 14:42, Cornelia Huck  wrote:
>> > > I'm having a hard time figuring out what a 'cold' or a 'warm' reset is
>> > > supposed to be... can you add a definition/guideline somewhere?
>> >
>> > Generally "cold" reset is "power on" and "warm" is "we were already
>> > powered-on, but somebody flipped a reset line somewhere".
>>
>> Ok, that makes sense... my main concern is to distinguish that in a
>> generic way, as it is a generic interface. What about adding something
>> like:
>>
>> "A 'cold' reset means that the object to be reset is initially reset; a 
>> 'warm'
>> reset means that the object to be reset has already been initialized."
>>
>> Or is that again too generic?
>
> I think it doesn't quite capture the idea -- an object can have already
> been reset and then get a 'cold' reset: this is like having a powered-on
> machine and then power-cycling it.
>
> The 'warm' reset is the vaguer one, because the specific behaviour
> is somewhat device-dependent (many devices might not have any
> difference from 'cold' reset, for those that do the exact detail
> of what doesn't get reset on warm-reset will vary). But every
> device should have some kind of "as if you power-cycled it" (or
> for QEMU, "go back to the same state as if you just started QEMU on the
> command line"). Our current "reset" method is really cold-reset.

Is there any concept of locality associated with warm reset?
For example, you'd expect a cold reset to happen on the whole system,
but I guess a warm reset could be restricted to a single bus.

The documentation should give examples of how warm reset could be
triggered, and what it could do differently from cold reset.

>
> thanks
> -- PMM


--
Cheers,
Christophe de Dinechin (IRC c3d)

Re: [Qemu-devel] [PATCH v8 16/16] block/io_uring: adds fd registration

2019-07-31 Thread Stefan Hajnoczi

On Tue, Jul 30, 2019 at 11:04:41PM +0530, Aarushi Mehta wrote:

I'm concerned about file descriptor leaks.  fd_array[] keeps file
descriptors basically forever, even after the file is no longer in use
by the rest of QEMU.  There needs to be a call to unregister whenever a
file is closed elsewhere in QEMU.  For benchmarking and experimentation
the current code is okay, but for production usage the leak must be
prevented.

> +/**
> + * luring_fd_register:
> + *
> + * Register and unregisters file descriptors, see luring_fd_lookup
> + */
> +static int luring_fd_register(struct io_uring *ring, LuringFd *fd_reg, int 
> fd)
> +{
> +int ret, nr;
> +GHashTable *lookup = fd_reg->fd_lookup;
> +nr = g_hash_table_size(lookup);
> +
> +/* Unregister */
> +if (!fd) {
> +ret = io_uring_unregister_files(ring);
> +g_hash_table_remove_all(lookup);

Is it correct to clear the hash table be cleared if there was an error?

> +return ret;
> +}

Please make unregistering all files a separate function.  It's not
necessary to overload this function since this is a completely separate
operation.

> +
> +/* If adding new, API requires older registrations to be removed */
> +if (nr) {
> +io_uring_unregister_files(ring);
> +}
> +
> +fd_reg->fd_array = g_realloc_n(fd_reg->fd_array, nr + 1, sizeof(int));
> +fd_reg->fd_array[nr] = fd;
> +fd_reg->fd_index = g_realloc_n(fd_reg->fd_index, nr + 1, sizeof(int));
> +fd_reg->fd_index[nr] = nr;
> +
> +g_hash_table_insert(lookup, &fd_reg->fd_array[nr], 
> &fd_reg->fd_index[nr]);

fd_index[] is not necessary, you can cast nr to a gpointer instead to
store the data directly inside GHashTable:

  g_hash_table_insert(lookup, &fd_reg->fd_array[nr],
  GINT_TO_POINTER(nr));

The hash table accesses can be made slightly more efficient by avoiding
the pointer dereference for keys as well:

  g_hash_table_insert(lookup, GINT_TO_POINTER(fd),
  GINT_TO_POINTER(nr));

In this case fd_array[] is only used for the io_uring_register_files()
call and nothing else.  Remember to switch to g_direct_equal() and
g_direct_hash() in g_hash_table_new_full() if you make the key a direct
gpointer.

> +trace_luring_fd_register(fd, nr);
> +return io_uring_register_files(ring, fd_reg->fd_array, nr + 1);
> +}
> +
> +/**
> + * luring_fd_lookup:
> + *
> + * Used to lookup fd index in registered array at submission time
> + * If the lookup table has not been created or the fd is not in the table,
> + * the fd is registered.
> + *
> + * If registration errors, the hash is cleared and the fd used directly
> + *
> + * Unregistering is done at luring_detach_aio_context
> + */
> +static int luring_fd_lookup(LuringState *s, int fd)
> +{
> +int *index, ret;
> +if (!s->fd_reg.fd_lookup) {
> +s->fd_reg.fd_lookup = g_hash_table_new_full(g_int_hash, g_int_equal,
> +g_free, g_free);

fd_array[] and fd_index[] are allocated in single allocations for the
entire array, therefore g_free(key) and g_free(value) on individual
elements is undefined behavior and could crash the program.  There
should be no destroy function for them.

Missing g_hash_table_unref() to free fd_lookup.

> +luring_fd_register(&s->ring, &s->fd_reg, fd);
> +}
> +index = g_hash_table_lookup(s->fd_reg.fd_lookup, &fd);
> +
> +if (!index) {
> +ret = luring_fd_register(&s->ring, &s->fd_reg, fd);
> +if (ret < 0) {
> +g_hash_table_remove_all(s->fd_reg.fd_lookup);

Why is the hash table cleared and why are fd_array[]/fd_index[] left
behind?

> +return ret;
> +}
> +index = g_hash_table_lookup(s->fd_reg.fd_lookup, &fd);
> +}
> +return *index;
> +}

What are the concerns about in-flight requests and how are they
addressed?  For example, if a request is in-flight and another request
wants to add a new fd then io_uring_unregister_files() and
io_uring_register_files() are called while a request is still in-flight.
How does the io_uring kernel code handle this?

signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v3] blockjob: drain all job nodes in block_job_drain

30.07.2019 22:11, John Snow wrote:
> 
> 
> On 7/24/19 5:40 AM, Vladimir Sementsov-Ogievskiy wrote:
>> Instead of draining additional nodes in each job code, let's do it in
>> common block_job_drain, draining just all job's children.
>> BlockJobDriver.drain becomes unused, so, drop it at all.
>>
>> It's also a first step to finally get rid of blockjob->blk.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>> ---
>>
>> v3: just resend, as I've some auto returned mails and not sure that
>>  v2 reached recipients.
>>
>> v2: apply Max's suggestions:
>>   - drop BlockJobDriver.drain
>>   - do firtly loop of bdrv_drained_begin and then separate loop
>> of bdrv_drained_end.
>>
>> Hmm, a question here: should I call bdrv_drained_end in reverse
>> order? Or it's OK as is?
>>
> 
> I think it should be OK. These nodes don't necessarily have a well
> defined relationship between each other, do they?
> 
>>   include/block/blockjob_int.h | 11 ---
>>   block/backup.c   | 18 +-
>>   block/mirror.c   | 26 +++---
>>   blockjob.c   | 13 -
>>   4 files changed, 12 insertions(+), 56 deletions(-)
>>
> 
> Nice diffstat :)
> 
>> diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
>> index e4a318dd15..e1abf4ee85 100644
>> --- a/include/block/blockjob_int.h
>> +++ b/include/block/blockjob_int.h
>> @@ -52,17 +52,6 @@ struct BlockJobDriver {
>>* besides job->blk to the new AioContext.
>>*/
>>   void (*attached_aio_context)(BlockJob *job, AioContext *new_context);
>> -
>> -/*
>> - * If the callback is not NULL, it will be invoked when the job has to 
>> be
>> - * synchronously cancelled or completed; it should drain 
>> BlockDriverStates
>> - * as required to ensure progress.
>> - *
>> - * Block jobs must use the default implementation for job_driver.drain,
>> - * which will in turn call this callback after doing generic block job
>> - * stuff.
>> - */
>> -void (*drain)(BlockJob *job);
> 
> I was about to say "huh?" ... but then realized you're deleting this
> confusing glob. Good.
> 
>>   };
>>   
>>   /**
>> diff --git a/block/backup.c b/block/backup.c
>> index 715e1d3be8..7930004bbd 100644
>> --- a/block/backup.c
>> +++ b/block/backup.c
>> @@ -320,21 +320,6 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
>>   hbitmap_set(backup_job->copy_bitmap, 0, backup_job->len);
>>   }
>>   
>> -static void backup_drain(BlockJob *job)
>> -{
>> -BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>> -
>> -/* Need to keep a reference in case blk_drain triggers execution
>> - * of backup_complete...
>> - */
>> -if (s->target) {
>> -BlockBackend *target = s->target;
>> -blk_ref(target);
>> -blk_drain(target);
>> -blk_unref(target);
>> -}
>> -}
>> -
> 
> Adios ...
> 
>>   static BlockErrorAction backup_error_action(BackupBlockJob *job,
>>   bool read, int error)
>>   {
>> @@ -493,8 +478,7 @@ static const BlockJobDriver backup_job_driver = {
>>   .commit = backup_commit,
>>   .abort  = backup_abort,
>>   .clean  = backup_clean,
>> -},
>> -.drain  = backup_drain,
>> +}
>>   };
>>   
> 
> This pleases the eyes.
> 
>>   static int64_t backup_calculate_cluster_size(BlockDriverState *target,
>> diff --git a/block/mirror.c b/block/mirror.c
>> index 8cb75fb409..8456ccd89d 100644
>> --- a/block/mirror.c
>> +++ b/block/mirror.c
>> @@ -644,14 +644,11 @@ static int mirror_exit_common(Job *job)
>>   bdrv_ref(mirror_top_bs);
>>   bdrv_ref(target_bs);
>>   
>> -/* Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
>> +/*
>> + * Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
> 
> (Thanks, patchew...)
> 
>>* inserting target_bs at s->to_replace, where we might not be able to 
>> get
>>* these permissions.
>> - *
>> - * Note that blk_unref() alone doesn't necessarily drop permissions 
>> because
>> - * we might be running nested inside mirror_drain(), which takes an 
>> extra
>> - * reference, so use an explicit blk_set_perm() first. */
>> -blk_set_perm(s->target, 0, BLK_PERM_ALL, &error_abort);
>> + */
>>   blk_unref(s->target);
>>   s->target = NULL;
>>   
>> @@ -1143,21 +1140,6 @@ static bool mirror_drained_poll(BlockJob *job)
>>   return !!s->in_flight;
>>   }
>>   
>> -static void mirror_drain(BlockJob *job)
>> -{
>> -MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
>> -
>> -/* Need to keep a reference in case blk_drain triggers execution
>> - * of mirror_complete...
>> - */
>> -if (s->target) {
>> -BlockBackend *target = s->target;
>> -blk_ref(target);
>> -blk_drain(target);
>> -bl

Re: [Qemu-devel] [PATCH 0/3] backup fixes for 4.1?

30.07.2019 21:41, John Snow wrote:
> 
> 
> On 7/30/19 12:32 PM, Vladimir Sementsov-Ogievskiy wrote:
>> Hi all!
>>
>> Here are two small fixes.
>>
>> 01 is not a degradation at all, so it's OK for 4.2
>> 02 is degradation of 3.0, so it's possibly OK for 4.2 too,
>> but it seems to be real bug and fix is very simple, so,
>> may be 4.1 is better
>>
>> Or you may take the whole series to 4.1 if you want.
>>
> 
> I think (1) and (2) can go in for stable after review, but they're not
> crucial for 4.1 especially at this late of a stage. Should be cataclysms
> only right now.
> 
> --js
> 

I can rebase it than on your bitmaps branch. Or, if we want it for stable, 
maybe,
I shouldn't?

-- 
Best regards,
Vladimir

Re: [Qemu-devel] When to use qemu/typedefs.h

2019-07-31 Thread Peter Maydell

On Wed, 31 Jul 2019 at 09:40, Thomas Huth  wrote:
> IMHO we should get rid of mandating typedefs. They are causing too much
> trouble - e.g. do you also remember the issues with duplicated typedefs
> in certain compiler versions in the past? (these should be hopefully
> gone now, but still...)
>
> And many QEMU developers are also working on the Linux kernel, which
> rather forbids typedefs. Having to switch your mind back and forth
> whether to use typedefs or not is really annoying.

I would rather keep typedefs -- it's one of the style issues we're
reasonably consistent with. QEMU isn't the kernel, and its style
is not the same on many points. If we switch to "use 'struct Foo'"
we'll have a codebase which becomes rapidly very inconsistent
about whether we use 'struct' or not.

thanks
-- PMM

Re: [Qemu-devel] When to use qemu/typedefs.h

2019-07-31 Thread Daniel P . Berrangé

On Wed, Jul 31, 2019 at 11:45:41AM +0100, Peter Maydell wrote:
> On Wed, 31 Jul 2019 at 09:40, Thomas Huth  wrote:
> > IMHO we should get rid of mandating typedefs. They are causing too much
> > trouble - e.g. do you also remember the issues with duplicated typedefs
> > in certain compiler versions in the past? (these should be hopefully
> > gone now, but still...)
> >
> > And many QEMU developers are also working on the Linux kernel, which
> > rather forbids typedefs. Having to switch your mind back and forth
> > whether to use typedefs or not is really annoying.
> 
> I would rather keep typedefs -- it's one of the style issues we're
> reasonably consistent with. QEMU isn't the kernel, and its style
> is not the same on many points. If we switch to "use 'struct Foo'"
> we'll have a codebase which becomes rapidly very inconsistent
> about whether we use 'struct' or not.

I tend to agree - while people may work on kernel code, plenty do not
work on kernel code & QEMU is not following kernel code pratices more
generally. I think it is more compelling to align with glib given that
it is a core part of QEMU codebase. I'd much rather QEMU more closely
align with glib and increasingly drop stuff that QEMU has reinvented
in favour of using GLib features. For example I could see GObject as
a  base for QOM in future, and typedefs are a normal practice in this
case.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [PATCH 05/28] queue: Drop superfluous #include qemu/atomic.h

On 26/07/2019 14.05, Markus Armbruster wrote:
> When commit 5f7d05ecfda added QLIST_INSERT_HEAD_RCU() to qemu/queue.h,
> it had to include qemu/atomic.h.  Commit 341774fe6cc removed
> QLIST_INSERT_HEAD_RCU() again, but neglected to remove the #include.
> Do that now.
> 
> Signed-off-by: Markus Armbruster 
> ---
>  include/qemu/queue.h | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/include/qemu/queue.h b/include/qemu/queue.h
> index 0379bd8fdb..73bf4a984d 100644
> --- a/include/qemu/queue.h
> +++ b/include/qemu/queue.h
> @@ -78,8 +78,6 @@
>   * For details on the use of these macros, see the queue(3) manual page.
>   */
>  
> -#include "qemu/atomic.h" /* for smp_wmb() */
> -
>  /*
>   * List definitions.
>   */
> 

Reviewed-by: Thomas Huth

Re: [Qemu-devel] [PATCH v4 0/4] delete created files when block_crypto_co_create_opts_luks fails

2019-07-31 Thread Daniel Henrique Barboza


Ping

On 6/28/19 4:45 PM, Daniel Henrique Barboza wrote:

Changes from previous version [1]:
- added an extra patch including a new qemu-iotest to exercise the fix


[1] https://lists.gnu.org/archive/html/qemu-devel/2019-03/msg07159.html

Daniel Henrique Barboza (4):
   block: introducing 'bdrv_co_delete_file' interface
   block.c: adding bdrv_delete_file
   crypto.c: cleanup created file when block_crypto_co_create_opts_luks
 fails
   qemu-iotests: adding LUKS cleanup for non-UTF8 secret error

  block.c| 82 ++
  block/crypto.c | 31 ++
  block/file-posix.c | 28 +
  include/block/block.h  |  3 ++
  include/block/block_int.h  |  6 +++
  tests/qemu-iotests/257 | 67 +++
  tests/qemu-iotests/257.out | 11 +
  tests/qemu-iotests/group   |  1 +
  8 files changed, 229 insertions(+)
  create mode 100755 tests/qemu-iotests/257
  create mode 100644 tests/qemu-iotests/257.out

Re: [Qemu-devel] [RFC] virtio-mmio: implement modern (v2) personality (virtio-1)

2019-07-31 Thread Sergio Lopez


Andrea Bolognani  writes:

> On Mon, 2019-07-29 at 14:57 +0200, Sergio Lopez wrote:
> [...]
>>  /* virtio-mmio device */
>>  
>>  static Property virtio_mmio_properties[] = {
>>  DEFINE_PROP_BOOL("format_transport_address", VirtIOMMIOProxy,
>>   format_transport_address, true),
>> +DEFINE_PROP_BOOL("modern", VirtIOMMIOProxy, modern, false),
>>  DEFINE_PROP_END_OF_LIST(),
>>  };
>
> Not a QEMU developer so forgive me if I say something silly, but IIUC
> you'd be able to opt into the new feature by using eg.
>
>   -device virtio-net-device,modern=on
>
> However, virtio-pci devices already have a mechanism to control the
> VirtIO protocol version, where you use
>
>   -device virtio-net-pci,disable-modern=no,disable-legacy=yes
>
> to get a VirtIO 1.x-only device and
>
>   -device virtio-net-pci,disable-modern=no,disable-legacy=no
>
> for a transitional device.
>
> Can you please make sure virtio-mmio uses the existing interface
> instead of introducing a new one?

The problem here is that virtio-pci devices register an specific type
for each kind of supported device (virtio-net-pci, virtio-blk-pci...),
while virtio-mmio doesn't. This saves a lot of boilerplate, but also
implies that bus properties can't be passed through the attached device
(virtio-blk-device can't carry properties for it's virtio-mmio parent
bus).

Sergio.


signature.asc
Description: PGP signature

Re: [Qemu-devel] [RFC] virtio-mmio: implement modern (v2) personality (virtio-1)

2019-07-31 Thread Sergio Lopez


Michael S. Tsirkin  writes:

> On Tue, Jul 30, 2019 at 03:14:00PM +0200, Cornelia Huck wrote:
>> On Tue, 30 Jul 2019 14:17:48 +0200
>> Andrea Bolognani  wrote:
>> 
>> > On Tue, 2019-07-30 at 13:35 +0200, Cornelia Huck wrote:
>> > > On Tue, 30 Jul 2019 12:25:30 +0200
>> > > Andrea Bolognani  wrote:  
>> > > > Can you please make sure virtio-mmio uses the existing interface
>> > > > instead of introducing a new one?  
>> > > 
>> > > FWIW, I really hate virtio-pci's disable-modern/disable-legacy... for a
>> > > starter, what is 'modern'? Will we have 'ultra-modern' in the future?  
>> > 
>> > AIUI the modern/legacy terminology is part of the VirtIO spec, so
>> > while I agree that it's not necessarily the least prone to ambiguity
>> > at least it's well defined.
>> 
>> Legacy is, modern isn't :) Devices/drivers are conforming to the
>> standard, I don't think there's a special term for that.
>
> Right, if we followed the spec, disable-modern would have been
> force-legacy.
>
> I'm fine with adding force-legacy for everyone and asking tools to
> transition if there. Document it's same as disable-modern for pci.
> Cornelia?

FWIW, for this patch, I'm perfectly fine with changing the "modern"
property to "force-legacy", with "true" as the default value.

>> > 
>> > > It is also quite backwards with the 'disable' terminology.  
>> > 
>> > That's also true. I never claimed the way virtio-pci does it is
>> > perfect!
>> > 
>> > > We also have a different mechanism for virtio-ccw ('max_revision',
>> > > which covers a bit more than virtio-1; it doesn't have a 'min_revision',
>> > > as negotiating the revision down is fine), so I don't see why
>> > > virtio-mmio should replicate the virtio-pci mechanism.
>> > > 
>> > > Also, IIUC, virtio-mmio does not have transitional devices, but either
>> > > version 1 (legacy) or version 2 (virtio-1). It probably makes more
>> > > sense to expose the device version instead; either as an exact version
>> > > (especially if it isn't supposed to go up without incompatible
>> > > changes), or with some min/max concept (where version 1 would stand a
>> > > bit alone, so that would probably be a bit awkward.)  
>> > 
>> > I think that if reinventing the wheel is generally agreed not to be
>> > a good idea, then it stands to reason that reinventing it twice can
>> > only be described as absolute madness :)
>> > 
>> > We should have a single way to control the VirtIO protocol version
>> > that works for all VirtIO devices, regardless of transport. We might
>> > even want to have virtio-*-{device,ccw}-non-transitional to mirror
>> > the existing virtio-*-pci-non-transitional.
>> > 
>> > FWIW, libvirt already implements support for (non)-transitional
>> > virtio-pci devices using either the dedicated devices or the base
>> > virtio-pci plus the disable-{modern,legacy} attributes.
>> 
>> One problem (besides my dislike of the existing virtio-pci
>> interfaces :) is that pci, ccw, and mmio all have slightly different
>> semantics.
>> 
>> - pci: If we need to keep legacy support around, we cannot enable some
>>   features (IIRC, pci-e, maybe others as well.) That means transitional
>>   devices are in some ways inferior to virtio-1 only devices, so it
>>   makes a lot of sense to be able to configure devices without legacy
>>   support. The differences between legacy and virtio-1 are quite large.
>> - ccw: Has revisions negotiated between device and driver; virtio-1
>>   requires revision 1 or higher. (Legacy drivers that don't know the
>>   concept of revisions automatically get revision 0.) Differences
>>   between legacy and virtio-1 are mostly virtqueue endianness and some
>>   control structures.
>> - mmio: Has device versions offered by the device, the driver can take
>>   it or leave it. No transitional devices. Differences don't look as
>>   large as the ones for pci, either.
>> 
>> So, if we were to duplicate the same scheme as for pci for ccw and mmio
>> as well, we'd get
>> 
>> - ccw: devices that support revision 0 only (disable-modern), that act
>>   as today, or that support at least revision 1 (disable-legacy). We
>>   still need to keep max_revision around for backwards compatibility.
>>   Legacy only makes sense for compat machines (although this is
>>   equivalent to max_revision 0); I don't see a reason why you would
>>   want virtio-1 only devices, unless you'd want to rip out legacy
>>   support in QEMU completely.
>
> Reduce security attack surface slightly. Save some cycles
> (down the road) on branches in the endian-ness handling.
> Make sure your guests
> are all up to date in preparation to the day when legacy will go away.
>
> Not a huge win, for sure, but hey - it's something.
>
>> - mmio: devices that support version 1 (disable-modern), or version 2
>>   (disable-legacy). You cannot have both at the same time. Whether this
>>   makes sense depends on whether there will be a version 3 in the
>>   future.
>> 
>> So, this might make some sense for mmio; for ccw, I d

Re: [Qemu-devel] [PATCH v3 06/33] add the vmstate description for device reset state




On 7/31/19 8:08 AM, David Gibson wrote:
> On Mon, Jul 29, 2019 at 04:56:27PM +0200, Damien Hedde wrote:
>> It contains the resetting counter and cold flag status.
>>
>> At this point, migration of bus reset related state (counter and cold/warm
>> flag) is handled by parent device. This done using the post_load
>> function in the vmsd subsection.
>>
>> This is last point allow to add an initial support of migration with part of
>> qdev/qbus tree in reset state under the following condition:
>> + time-lasting reset are asserted on Device only
>>
>> Note that if this condition is not respected, migration will succeed and
>> no failure will occurs. The only impact is that the resetting counter
>> of a bus may lower afer a migration.
>>
>> Signed-off-by: Damien Hedde 
>> ---
>>  hw/core/Makefile.objs  |  1 +
>>  hw/core/qdev-vmstate.c | 45 ++
>>  2 files changed, 46 insertions(+)
>>  create mode 100644 hw/core/qdev-vmstate.c
>>
>> diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
>> index d9234aa98a..49e9be0228 100644
>> --- a/hw/core/Makefile.objs
>> +++ b/hw/core/Makefile.objs
>> @@ -4,6 +4,7 @@ common-obj-y += bus.o reset.o
>>  common-obj-y += resettable.o
>>  common-obj-$(CONFIG_SOFTMMU) += qdev-fw.o
>>  common-obj-$(CONFIG_SOFTMMU) += fw-path-provider.o
>> +common-obj-$(CONFIG_SOFTMMU) += qdev-vmstate.o
>>  # irq.o needed for qdev GPIO handling:
>>  common-obj-y += irq.o
>>  common-obj-y += hotplug.o
>> diff --git a/hw/core/qdev-vmstate.c b/hw/core/qdev-vmstate.c
>> new file mode 100644
>> index 00..07b010811f
>> --- /dev/null
>> +++ b/hw/core/qdev-vmstate.c
>> @@ -0,0 +1,45 @@
>> +/*
>> + * Device vmstate
>> + *
>> + * Copyright (c) 2019 GreenSocs
>> + *
>> + * Authors:
>> + *   Damien Hedde
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "hw/qdev.h"
>> +#include "migration/vmstate.h"
>> +
>> +static bool device_vmstate_reset_needed(void *opaque)
>> +{
>> +DeviceState *dev = (DeviceState *) opaque;
>> +return dev->resetting != 0;
>> +}
>> +
>> +static int device_vmstate_reset_post_load(void *opaque, int version_id)
>> +{
>> +DeviceState *dev = (DeviceState *) opaque;
>> +BusState *bus;
>> +QLIST_FOREACH(bus, &dev->child_bus, sibling) {
>> +bus->resetting = dev->resetting;
> 
> Having redundant copies of the resetting bit in the bridge and every
> bus instance seems kind of bogus.

Currently we duplicate the resetting bit of parent into children when we
do the reset propagation into the tree. It means resetting count of an
device/bus contains the value of its parent plus any additional bit
local to the object (due to a reset from an gpio for example).

I'm not sure if we can avoid that. It would require the
"get_resetting_count" somehow to be recursive and fetch parent value and
so on. I need to work on it to know if it's really possible.

> 
>> +bus->reset_is_cold = dev->reset_is_cold;
>> +}
>> +return 0;
>> +}
>> +
>> +const struct VMStateDescription device_vmstate_reset = {
>> +.name = "device_reset",
>> +.version_id = 0,
>> +.minimum_version_id = 0,
>> +.needed = device_vmstate_reset_needed,
>> +.post_load = device_vmstate_reset_post_load,
>> +.fields = (VMStateField[]) {
>> +VMSTATE_UINT32(resetting, DeviceState),
>> +VMSTATE_BOOL(reset_is_cold, DeviceState),
>> +VMSTATE_END_OF_LIST()
>> +},
>> +};
>

Re: [Qemu-devel] [PATCH] ehci: Ensure that device is not NULL before calling usb_ep_get

On 7/30/19 7:45 PM, Guenter Roeck wrote:
> The following assert is seen once in a while while resetting the
> Linux kernel.
> 
> qemu-system-x86_64: hw/usb/core.c:734: usb_ep_get:
>   Assertion `dev != NULL' failed.
> 
> The call to usb_ep_get() originates from ehci_execute().
> Analysis and debugging shows that p->queue->dev can indeed be NULL
> in this function. Add check for this condition and return an error
> if it is seen.

Your patch is not wrong as it corrects your case, but I wonder why we
get there. This assert seems to have catched a bug.

Gerd, shouldn't we call usb_packet_cleanup() in ehci_reset() rather than
ehci_finalize()? Then we shouldn't need this patch.

> Signed-off-by: Guenter Roeck 
> ---
>  hw/usb/hcd-ehci.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
> index 62dab05..c759f3e 100644
> --- a/hw/usb/hcd-ehci.c
> +++ b/hw/usb/hcd-ehci.c
> @@ -1348,6 +1348,11 @@ static int ehci_execute(EHCIPacket *p, const char 
> *action)
>  return -1;
>  }
>  
> +if (p->queue->dev == NULL) {
> +ehci_trace_guest_bug(p->queue->ehci, "No device attached to 
> queue\n");
> +return -1;
> +}
> +
>  if (get_field(p->qtd.token, QTD_TOKEN_TBYTES) > BUFF_SIZE) {
>  ehci_trace_guest_bug(p->queue->ehci,
>   "guest requested more bytes than allowed");
>

Re: [Qemu-devel] [PATCH v3 05/33] Switch to new api in qdev/bus

On 7/31/19 11:29 AM, Damien Hedde wrote:
> On 7/31/19 8:05 AM, David Gibson wrote:
>> On Mon, Jul 29, 2019 at 04:56:26PM +0200, Damien Hedde wrote:
>>> Deprecate old reset apis and make them use the new one while they
>>> are still used somewhere.
>>>
>>> Signed-off-by: Damien Hedde 
>>> ---
>>>  hw/core/qdev.c | 22 +++---
>>>  include/hw/qdev-core.h | 28 ++--
>>>  2 files changed, 25 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/hw/core/qdev.c b/hw/core/qdev.c
>>> index 559ced070d..e9e5f2d5f9 100644
>>> --- a/hw/core/qdev.c
>>> +++ b/hw/core/qdev.c
>>> @@ -312,25 +312,9 @@ static void device_foreach_reset_child(Object *obj, 
>>> void (*func)(Object *))
>>>  }
>>>  }
>>>  
>>> -static int qdev_reset_one(DeviceState *dev, void *opaque)
>>> -{
>>> -device_legacy_reset(dev);
>>> -
>>> -return 0;
>>> -}
>>> -
>>> -static int qbus_reset_one(BusState *bus, void *opaque)
>>> -{
>>> -BusClass *bc = BUS_GET_CLASS(bus);
>>> -if (bc->reset) {
>>> -bc->reset(bus);
>>> -}
>>> -return 0;
>>> -}
>>> -
>>>  void qdev_reset_all(DeviceState *dev)
>>>  {
>>> -qdev_walk_children(dev, NULL, NULL, qdev_reset_one, qbus_reset_one, 
>>> NULL);
>>> +device_reset(dev, false);
>>>  }
>>>  
>>>  void qdev_reset_all_fn(void *opaque)
>>> @@ -340,7 +324,7 @@ void qdev_reset_all_fn(void *opaque)
>>>  
>>>  void qbus_reset_all(BusState *bus)
>>>  {
>>> -qbus_walk_children(bus, NULL, NULL, qdev_reset_one, qbus_reset_one, 
>>> NULL);
>>> +bus_reset(bus, false);
>>>  }
>>>  
>>>  void qbus_reset_all_fn(void *opaque)
>>> @@ -922,7 +906,7 @@ static void device_set_realized(Object *obj, bool 
>>> value, Error **errp)
>>>  }
>>>  }
>>>  if (dev->hotplugged) {
>>> -device_legacy_reset(dev);
>>> +device_reset(dev, true);
>>
>> So.. is this change in the device_reset() signature really necessary?
>> Even if there are compelling reasons to handle warm reset in the new
>> API, that doesn't been you need to change device_reset() itself from
>> its established meaning of a cold (i.e. as per power cycle) reset.
>> Warm resets are generally called in rather more specific circumstances
>> (often under guest software direction) so it seems likely that users
>> would want to engage with the new reset API directly.  Or we could
>> just create a device_warm_reset() wrapper.  That would also avoid the
>> bare boolean parameter, which is not great for readability (you have
>> to look up the signature to have any idea what it means).

If the boolean is not meaningful, we can use an enum...

> I've added device_reset_cold/warm wrapper functions to avoid having to
> pass the boolean parameter. it seems I forgot to use them in qdev.c
> I suppose, like you said, we could live with
> + no function with the boolean parameter
> + device_reset doing cold reset
> + device_reset_warm (or device_warm_reset) for the warm version
> 
> Damien
>

Re: [Qemu-devel] [PATCH v5] net: tap: replace snprintf with g_strdup_printf calls

2019-07-31 Thread Markus Armbruster

P J P  writes:

> From: Prasad J Pandit 
>
> When invoking qemu-bridge-helper in 'net_bridge_run_helper',
> instead of using fixed sized buffers, use dynamically allocated
> ones initialised and returned by g_strdup_printf().

Does this fix a bug?

> If bridge name 'br_buf' is undefined, pass empty string ("") to
> g_strdup_printf() in its place, to avoid printing "(null)" string.
>
> Signed-off-by: Prasad J Pandit 
> ---
>  net/tap.c | 19 +++
>  1 file changed, 11 insertions(+), 8 deletions(-)
>
> Update v5: add commit message about conditional 'br_buf' argument
>   -> https://lists.gnu.org/archive/html/qemu-devel/2019-07/msg06397.html
>
> diff --git a/net/tap.c b/net/tap.c
> index e8aadd8d4b..fc38029f41 100644
> --- a/net/tap.c
> +++ b/net/tap.c
> @@ -498,9 +498,9 @@ static int net_bridge_run_helper(const char *helper, 
> const char *bridge,
>  }
>  if (pid == 0) {
>  int open_max = sysconf(_SC_OPEN_MAX), i;
> -char fd_buf[6+10];
> -char br_buf[6+IFNAMSIZ] = {0};
> -char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15];
> +char *fd_buf = NULL;

Dead initializer.

> +char *br_buf = NULL;
> +char *helper_cmd = NULL;

Another one.

>  
>  for (i = 3; i < open_max; i++) {
>  if (i != sv[1]) {
> @@ -508,17 +508,17 @@ static int net_bridge_run_helper(const char *helper, 
> const char *bridge,
>  }
>  }
>  
> -snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]);
> +fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]);

Good opportunity to change this to

   fd_buf = g_strdup_printf("--fd=%d", sv[1]);

More of the same below.

>  
>  if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
>  /* assume helper is a command */
>  
>  if (strstr(helper, "--br=") == NULL) {
> -snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
> +br_buf = g_strdup_printf("%s%s", "--br=", bridge);
>  }
>  
> -snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s",
> - helper, "--use-vnet", fd_buf, br_buf);
> +helper_cmd = g_strdup_printf("%s %s %s %s", helper,
> +"--use-vnet", fd_buf, br_buf ? br_buf : "");
>  
>  parg = args;
>  *parg++ = (char *)"sh";
> @@ -527,10 +527,11 @@ static int net_bridge_run_helper(const char *helper, 
> const char *bridge,
>  *parg++ = NULL;
>  
>  execv("/bin/sh", args);
> +g_free(helper_cmd);
>  } else {
>  /* assume helper is just the executable path name */
>  
> -snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
> +br_buf = g_strdup_printf("%s%s", "--br=", bridge);
>  
>  parg = args;
>  *parg++ = (char *)helper;
> @@ -541,6 +542,8 @@ static int net_bridge_run_helper(const char *helper, 
> const char *bridge,
>  
>  execv(helper, args);
>  }
> +g_free(fd_buf);
> +g_free(br_buf);
>  _exit(1);
>  
>  } else {

The commit does what it claims to do, and no more, so
Reviewed-by: Markus Armbruster 

However, the code is still rather ugly, and I'd be tempted to use the
opportunity to clean up some more.  Untested sketch:

diff --git a/net/tap.c b/net/tap.c
index 06af8fb8ad..57bb4c552d 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -402,8 +402,7 @@ static void launch_script(const char *setup_script, const 
char *ifname,
   int fd, Error **errp)
 {
 int pid, status;
-char *args[3];
-char **parg;
+const char *argv[3];
 
 /* try to launch network script */
 pid = fork();
@@ -413,18 +412,18 @@ static void launch_script(const char *setup_script, const 
char *ifname,
 return;
 }
 if (pid == 0) {
-int open_max = sysconf(_SC_OPEN_MAX), i;
+int open_max = sysconf(_SC_OPEN_MAX);
+int i;
 
 for (i = 3; i < open_max; i++) {
 if (i != fd) {
 close(i);
 }
 }
-parg = args;
-*parg++ = (char *)setup_script;
-*parg++ = (char *)ifname;
-*parg = NULL;
-execv(setup_script, args);
+argv[0] = setup_script;
+argv[1] = ifname;
+argv[2] = NULL;
+execv(setup_script, (char *const *)argv);
 _exit(1);
 } else {
 while (waitpid(pid, &status, 0) != pid) {
@@ -478,8 +477,7 @@ static int net_bridge_run_helper(const char *helper, const 
char *bridge,
 {
 sigset_t oldmask, mask;
 int pid, status;
-char *args[5];
-char **parg;
+const char *argv[5];
 int sv[2];
 
 sigemptyset(&mask);
@@ -498,10 +496,9 @@ static int net_bridge_run_helper(const char *helper, const 
char *bridge,
 return -1;
 }
 if (pid == 0) {
-int open_max = sysconf(_SC_OPEN_MAX), i;
-char fd_buf[6+10];
-

Re: [Qemu-devel] [PATCH v2 4/8] hw/core: Add a config switch for the "register" device

On 7/31/19 9:56 AM, Thomas Huth wrote:
> The "register" device is only used by certain machines. Let's add
> a proper config switch for it so that it only gets compiled when we
> really need it.
> 
> Signed-off-by: Thomas Huth 
> ---
>  hw/Kconfig| 1 +
>  hw/core/Kconfig   | 3 +++
>  hw/core/Makefile.objs | 2 +-
>  hw/dma/Kconfig| 1 +
>  4 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/Kconfig b/hw/Kconfig
> index dbae1c0852..b45db3c813 100644
> --- a/hw/Kconfig
> +++ b/hw/Kconfig
> @@ -76,3 +76,4 @@ config XILINX_AXI
>  
>  config XLNX_ZYNQMP
>  bool
> +select REGISTER
> diff --git a/hw/core/Kconfig b/hw/core/Kconfig
> index c2a1ae8122..d11920fcb3 100644
> --- a/hw/core/Kconfig
> +++ b/hw/core/Kconfig
> @@ -9,3 +9,6 @@ config FITLOADER
>  
>  config PLATFORM_BUS
>  bool
> +
> +config REGISTER

What about naming it REGISTER_ARRAY or REGISTER_BLOCK?

The API is:

RegisterInfoArray *register_init_block32(...);

Cc'ing Alistair for better name ideas :)

> +bool
> diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
> index f8481d959f..d6cfb2a81b 100644
> --- a/hw/core/Makefile.objs
> +++ b/hw/core/Makefile.objs
> @@ -17,7 +17,7 @@ common-obj-$(CONFIG_SOFTMMU) += machine.o
>  common-obj-$(CONFIG_SOFTMMU) += loader.o
>  common-obj-$(CONFIG_FITLOADER) += loader-fit.o
>  common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
> -common-obj-$(CONFIG_SOFTMMU) += register.o
> +common-obj-$(CONFIG_REGISTER) += register.o
>  common-obj-$(CONFIG_SOFTMMU) += or-irq.o
>  common-obj-$(CONFIG_SOFTMMU) += split-irq.o
>  common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o
> diff --git a/hw/dma/Kconfig b/hw/dma/Kconfig
> index 751dec5426..5c61b67bc0 100644
> --- a/hw/dma/Kconfig
> +++ b/hw/dma/Kconfig
> @@ -16,6 +16,7 @@ config I8257
>  
>  config ZYNQ_DEVCFG
>  bool
> +select REGISTER
>  
>  config STP2000
>  bool
>

Re: [Qemu-devel] [PATCH v2 7/8] hw/misc: Add a config switch for the "unimplemented" device

On 7/31/19 9:56 AM, Thomas Huth wrote:
> The device is only used by some few boards. Let's use a proper Kconfig
> switch so that we only compile this code if we really need it.

I'd prefer having the UnimpDevice user-creatable and always present as a
core device...

Maybe this can be solved by using 'default y':

config UNIMP
bool
default y

> Signed-off-by: Thomas Huth 
> ---
>  hw/arm/Kconfig| 9 +
>  hw/microblaze/Kconfig | 1 +
>  hw/misc/Kconfig   | 3 +++
>  hw/misc/Makefile.objs | 2 +-
>  hw/sparc64/Kconfig| 1 +
>  5 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
> index 6e24c73b54..ab9e592d74 100644
> --- a/hw/arm/Kconfig
> +++ b/hw/arm/Kconfig
> @@ -217,6 +217,7 @@ config STELLARIS
>  select SSI_SD
>  select STELLARIS_INPUT
>  select STELLARIS_ENET # ethernet
> +select UNIMP
>  
>  config STRONGARM
>  bool
> @@ -283,6 +284,7 @@ config ALLWINNER_A10
>  select ALLWINNER_A10_PIC
>  select ALLWINNER_EMAC
>  select SERIAL
> +select UNIMP
>  
>  config RASPI
>  bool
> @@ -320,6 +322,7 @@ config XLNX_VERSAL
>  select PL011
>  select CADENCE
>  select VIRTIO_MMIO
> +select UNIMP
>  
>  config FSL_IMX25
>  bool
> @@ -355,6 +358,7 @@ config ASPEED_SOC
>  select SSI_M25P80
>  select TMP105
>  select TMP421
> +select UNIMP
>  
>  config MPS2
>  bool
> @@ -378,6 +382,7 @@ config FSL_IMX7
>  select IMX_I2C
>  select PCI_EXPRESS_DESIGNWARE
>  select SDHCI
> +select UNIMP
>  
>  config ARM_SMMUV3
>  bool
> @@ -389,6 +394,7 @@ config FSL_IMX6UL
>  select IMX_FEC
>  select IMX_I2C
>  select SDHCI
> +select UNIMP
>  
>  config MICROBIT
>  bool
> @@ -398,6 +404,7 @@ config NRF51_SOC
>  bool
>  select I2C
>  select ARM_V7M
> +select UNIMP
>  
>  config EMCRAFT_SF2
>  bool
> @@ -410,6 +417,7 @@ config MSF2
>  select PTIMER
>  select SERIAL
>  select SSI
> +select UNIMP
>  
>  config ZAURUS
>  bool
> @@ -448,6 +456,7 @@ config ARMSSE
>  select TZ_MPC
>  select TZ_MSC
>  select TZ_PPC
> +select UNIMP
>  
>  config ARMSSE_CPUID
>  bool
> diff --git a/hw/microblaze/Kconfig b/hw/microblaze/Kconfig
> index c4dc120973..e2697ced9c 100644
> --- a/hw/microblaze/Kconfig
> +++ b/hw/microblaze/Kconfig
> @@ -4,6 +4,7 @@ config PETALOGIX_S3ADSP1800
>  select XILINX
>  select XILINX_AXI
>  select XILINX_ETHLITE
> +select UNIMP
>  
>  config PETALOGIX_ML605
>  bool
> diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
> index 385e1b0cec..51754bb47c 100644
> --- a/hw/misc/Kconfig
> +++ b/hw/misc/Kconfig
> @@ -117,4 +117,7 @@ config AUX
>  bool
>  select I2C
>  
> +config UNIMP
> +bool
> +
>  source macio/Kconfig
> diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
> index e9aab519a1..e4aad707fb 100644
> --- a/hw/misc/Makefile.objs
> +++ b/hw/misc/Makefile.objs
> @@ -9,7 +9,7 @@ common-obj-$(CONFIG_PCI_TESTDEV) += pci-testdev.o
>  common-obj-$(CONFIG_EDU) += edu.o
>  common-obj-$(CONFIG_PCA9552) += pca9552.o
>  
> -common-obj-y += unimp.o
> +common-obj-$(CONFIG_UNIMP) += unimp.o
>  common-obj-$(CONFIG_FW_CFG_DMA) += vmcoreinfo.o
>  
>  # ARM devices
> diff --git a/hw/sparc64/Kconfig b/hw/sparc64/Kconfig
> index d4d76a89be..f9f8b0f73a 100644
> --- a/hw/sparc64/Kconfig
> +++ b/hw/sparc64/Kconfig
> @@ -17,3 +17,4 @@ config NIAGARA
>  bool
>  select EMPTY_SLOT
>  select SUN4V_RTC
> +select UNIMP
>

Re: [Qemu-devel] [PATCH v2 7/8] hw/misc: Add a config switch for the "unimplemented" device

On 31/07/2019 13.47, Philippe Mathieu-Daudé wrote:
> On 7/31/19 9:56 AM, Thomas Huth wrote:
>> The device is only used by some few boards. Let's use a proper Kconfig
>> switch so that we only compile this code if we really need it.
> 
> I'd prefer having the UnimpDevice user-creatable and always present as a
> core device...

Why? This is a sysbus device that needs to be wired up in code, so I
don't see a reason for making this user-creatable right now.

Anyway, that's a different subject, so this should not be part of this
patch.

 Thomas

Re: [Qemu-devel] [PATCH 3/3] i386/kvm: initialize struct at full before ioctl call

2019-07-31 Thread Andrey Shinkevich

On 31/07/2019 10:24, Christian Borntraeger wrote:
> 
> 
> On 30.07.19 21:20, Paolo Bonzini wrote:
>> On 30/07/19 18:01, Andrey Shinkevich wrote:
>>> Not the whole structure is initialized before passing it to the KVM.
>>> Reduce the number of Valgrind reports.
>>>
>>> Signed-off-by: Andrey Shinkevich 
>>
>> Christian, is this the right fix?  It's not expensive so it wouldn't be
>> an issue, just checking if there's any better alternative.
> 
> I think all of these variants are valid with pros and cons
> 1. teach valgrind about this:
> Add to coregrind/m_syswrap/syswrap-linux.c (and the relevant header files)
> knowledge about which parts are actually touched.
> 2. use designated initializers
> 3. use memset
> 3. use a valgrind callback VG_USERREQ__MAKE_MEM_DEFINED to tell that this 
> memory is defined
> 

Thank you all very much for taking part in the discussion.
Also, one may use the Valgrind technology to suppress the unwanted 
reports by adding the Valgrind specific format file valgrind.supp to the 
QEMU project. The file content is extendable for future needs.
All the cases we like to suppress will be recounted in that file.
A case looks like the stack fragments. For instance, from QEMU block:

{
hw/block/hd-geometry.c
Memcheck:Cond
fun:guess_disk_lchs
fun:hd_geometry_guess
fun:blkconf_geometry
...
fun:device_set_realized
fun:property_set_bool
fun:object_property_set
fun:object_property_set_qobject
fun:object_property_set_bool
}

The number of suppressed cases are reported by the Valgrind with every 
run: "ERROR SUMMARY: 5 errors from 3 contexts (suppressed: 0 from 0)"

Andrey

>>
>> Paolo
>>
>>> ---
>>>   target/i386/kvm.c | 3 +++
>>>   1 file changed, 3 insertions(+)
>>>
>>> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
>>> index dbbb137..ed57e31 100644
>>> --- a/target/i386/kvm.c
>>> +++ b/target/i386/kvm.c
>>> @@ -190,6 +190,7 @@ static int kvm_get_tsc(CPUState *cs)
>>>   return 0;
>>>   }
>>>   
>>> +memset(&msr_data, 0, sizeof(msr_data));
>>>   msr_data.info.nmsrs = 1;
>>>   msr_data.entries[0].index = MSR_IA32_TSC;
>>>   env->tsc_valid = !runstate_is_running();
>>> @@ -1706,6 +1707,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
>>>   
>>>   if (has_xsave) {
>>>   env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
>>> +memset(env->xsave_buf, 0, sizeof(struct kvm_xsave));
>>>   }
>>>   
>>>   max_nested_state_len = kvm_max_nested_state_length();
>>> @@ -3477,6 +3479,7 @@ static int kvm_put_debugregs(X86CPU *cpu)
>>>   return 0;
>>>   }
>>>   
>>> +memset(&dbgregs, 0, sizeof(dbgregs));
>>>   for (i = 0; i < 4; i++) {
>>>   dbgregs.db[i] = env->dr[i];
>>>   }
>>> -- 
>>> 1.8.3.1
>>>
>>
>>
> 

-- 
With the best regards,
Andrey Shinkevich

Re: [Qemu-devel] [PATCH v3 1/9] block: add .bdrv_need_rw_file_child_during_reopen_rw handler

On 25.07.19 11:18, Vladimir Sementsov-Ogievskiy wrote:
> On reopen to rw parent may need rw access to child in .prepare, for
> example qcow2 needs to write IN_USE flags into stored bitmaps
> (currently it is done in a hacky way after commit and don't work).
> So, let's introduce such logic.
> 
> The drawback is that in worst case bdrv_reopen_set_read_only may finish
> with error and in some intermediate state: some nodes reopened RW and
> some are not. But this is a way to fix bug around reopening qcow2
> bitmaps in the following commits.

This commit message doesn’t really explain what this patch does.

> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  include/block/block_int.h |   2 +
>  block.c   | 144 ++
>  2 files changed, 133 insertions(+), 13 deletions(-)
> 
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 3aa1e832a8..7bd6fd68dd 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -531,6 +531,8 @@ struct BlockDriver {
>   uint64_t parent_perm, uint64_t parent_shared,
>   uint64_t *nperm, uint64_t *nshared);
>  
> + bool (*bdrv_need_rw_file_child_during_reopen_rw)(BlockDriverState *bs);
> +
>  /**
>   * Bitmaps should be marked as 'IN_USE' in the image on reopening image
>   * as rw. This handler should realize it. It also should unset readonly
> diff --git a/block.c b/block.c
> index cbd8da5f3b..3c8e1c59b4 100644
> --- a/block.c
> +++ b/block.c
> @@ -1715,10 +1715,12 @@ static void bdrv_get_cumulative_perm(BlockDriverState 
> *bs, uint64_t *perm,
>   uint64_t *shared_perm);
>  
>  typedef struct BlockReopenQueueEntry {
> - bool prepared;
> - bool perms_checked;
> - BDRVReopenState state;
> - QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
> +bool reopened_file_child_rw;
> +bool changed_file_child_perm_rw;
> +bool prepared;
> +bool perms_checked;
> +BDRVReopenState state;
> +QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
>  } BlockReopenQueueEntry;
>  
>  /*
> @@ -3421,6 +3423,105 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue 
> *bs_queue,
> keep_old_opts);
>  }
>  
> +static int bdrv_reopen_set_read_only_drained(BlockDriverState *bs,
> + bool read_only,
> + Error **errp)
> +{
> +BlockReopenQueue *queue;
> +QDict *opts = qdict_new();
> +
> +qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
> +
> +queue = bdrv_reopen_queue(NULL, bs, opts, true);
> +
> +return bdrv_reopen_multiple(queue, errp);
> +}
> +
> +/*
> + * handle_recursive_reopens
> + *
> + * On fail it needs rollback_recursive_reopens to be called.

It would be nice if this description actually said anything about what
the function is supposed to do.

> + */
> +static int handle_recursive_reopens(BlockReopenQueueEntry *current,
> +Error **errp)
> +{
> +int ret;
> +BlockDriverState *bs = current->state.bs;
> +
> +/*
> + * We use the fact that in reopen-queue children are always following
> + * parents.
> + * TODO: Switch BlockReopenQueue to be QTAILQ and use
> + *   QTAILQ_FOREACH_REVERSE.

Why don’t you do that first?  It would make the code more obvious at
least to me.

> + */
> +if (QSIMPLEQ_NEXT(current, entry)) {
> +ret = handle_recursive_reopens(QSIMPLEQ_NEXT(current, entry), errp);
> +if (ret < 0) {
> +return ret;
> +}
> +}
> +
> +if ((current->state.flags & BDRV_O_RDWR) && bs->file && bs->drv &&
> +bs->drv->bdrv_need_rw_file_child_during_reopen_rw &&
> +bs->drv->bdrv_need_rw_file_child_during_reopen_rw(bs))
> +{
> +if (!bdrv_is_writable(bs->file->bs)) {
> +ret = bdrv_reopen_set_read_only_drained(bs->file->bs, false, 
> errp);

Hm.  Sorry, I find this all a bit hard to understand.  (No comments and
all.)

I understand that this is for an RO -> RW transition?  Everything is
still RO, but the parent will need an RW child before it transitions to
RW itself.


I’m going to be honest up front, I don’t like this very much.  But I
think it may be a reasonable solution for now.

As I remember, the problem was that when reopening a qcow2 node from RO
to RW, we need to write something in .prepare() (because it can fail),
but naturally no .prepare() is called after any .commit(), so no matter
the order of nodes in the ReopenQueue, the child node will never be RW
by this point.

Hm.  To me that mostly means that making the whole reopen process a
transaction was just a dream that turns out not to work.

OK, so what would be the real, proper, all-encompassing fix?  I suppose
we’d need a way to express reopen dependency relationships.  So if a
node depends on one or more of its children to

Re: [Qemu-devel] [RFC] virtio-mmio: implement modern (v2) personality (virtio-1)

2019-07-31 Thread Sergio Lopez


Michael S. Tsirkin  writes:

> On Mon, Jul 29, 2019 at 02:57:55PM +0200, Sergio Lopez wrote:
>> Implement the modern (v2) personality, according to the VirtIO 1.0
>> specification.
>> 
>> Support for v2 among guests is not as widespread as it'd be
>> desirable. While the Linux driver has had it for a while, support is
>> missing, at least, from Tianocore EDK II, NetBSD and FreeBSD.
>
> The fact that there was no open source hypervisor implementation has
> probably contributed to this :)
>
>> For this reason, the v2 personality is disabled, keeping the legacy
>> behavior as default.
>
> I agree it's a good default for existing machine types.
>
>> Machine types willing to use v2, can enable it
>> using MachineClass's compat_props.
>
> Hmm. Are compat_props really the recommended mechanism to
> tweak defaults? I was under the impression it's
> only for compatibility with old machine types.
> Eduardo, any opinion on this?

Stefan suggested using something like "-global virtio-mmio.modern=true"
which does the trick for the command line, but I'd also like a way to
set it to true by default on microvm. We can discuss the best way to
achieve that (if using compat_props isn't acceptable) on the next
microvm patch series.

>> 
>> Signed-off-by: Sergio Lopez 
>
> Endian-ness seems to be wrong:
>
> static const MemoryRegionOps virtio_mem_ops = {
> .read = virtio_mmio_read,
> .write = virtio_mmio_write,
> .endianness = DEVICE_NATIVE_ENDIAN,
> };
>
> you will see this if you test a big endian guest.

Interesting, a Linux kernel compiled for aarch64_be works just
fine. Looking further, seems like, on ARM, Linux assumes all memory I/O
operations are little-endian and swaps the data if necessary:

arch/arm64/include/asm/io.h:
/*
 * Relaxed I/O memory access primitives. These follow the Device memory
 * ordering rules but do not guarantee any ordering relative to Normal memory
 * accesses.
 */
#define readb_relaxed(c)({ u8  __r = __raw_readb(c); __r; })
#define readw_relaxed(c)({ u16 __r = le16_to_cpu((__force 
__le16)__raw_readw(c)); __r; })
#define readl_relaxed(c)({ u32 __r = le32_to_cpu((__force 
__le32)__raw_readl(c)); __r; })
#define readq_relaxed(c)({ u64 __r = le64_to_cpu((__force 
__le64)__raw_readq(c)); __r; })

#define writeb_relaxed(v,c) ((void)__raw_writeb((v),(c)))
#define writew_relaxed(v,c) ((void)__raw_writew((__force 
u16)cpu_to_le16(v),(c)))
#define writel_relaxed(v,c) ((void)__raw_writel((__force 
u32)cpu_to_le32(v),(c)))
#define writeq_relaxed(v,c) ((void)__raw_writeq((__force 
u64)cpu_to_le64(v),(c)))

The Apendix X from virtio-0.9.5 specs states that "The endianness of the
registers follows the native endianness of the Guest". Luckily for us,
this isn't the case, as with DEVICE_NATIVE_ENDIAN
memory.c:adjust_endianness doesn't attempt any kind of transformation.

In any, I guess we should follow the spec, and keep DEVICE_NATIVE_ENDIAN
for the legacy mode while using DEVICE_LITTLE_ENDIAN for
virtio-mmio-2/virtio-1.

>> ---
>>  hw/virtio/virtio-mmio.c | 264 ++--
>>  1 file changed, 254 insertions(+), 10 deletions(-)
>> 
>> diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
>> index 97b7f35496..1da841336f 100644
>> --- a/hw/virtio/virtio-mmio.c
>> +++ b/hw/virtio/virtio-mmio.c
>> @@ -47,14 +47,24 @@
>>  OBJECT_CHECK(VirtIOMMIOProxy, (obj), TYPE_VIRTIO_MMIO)
>>  
>>  #define VIRT_MAGIC 0x74726976 /* 'virt' */
>> -#define VIRT_VERSION 1
>> +#define VIRT_VERSION_LEGACY 1
>> +#define VIRT_VERSION_MODERN 2
>>  #define VIRT_VENDOR 0x554D4551 /* 'QEMU' */
>>  
>> +typedef struct VirtIOMMIOQueue {
>> +uint16_t num;
>> +bool enabled;
>> +uint32_t desc[2];
>> +uint32_t avail[2];
>> +uint32_t used[2];
>> +} VirtIOMMIOQueue;
>> +
>>  typedef struct {
>>  /* Generic */
>>  SysBusDevice parent_obj;
>>  MemoryRegion iomem;
>>  qemu_irq irq;
>> +bool modern;
>>  /* Guest accessible state needing migration and reset */
>>  uint32_t host_features_sel;
>>  uint32_t guest_features_sel;
>> @@ -62,6 +72,9 @@ typedef struct {
>>  /* virtio-bus */
>>  VirtioBusState bus;
>>  bool format_transport_address;
>> +/* Fields only used for v2 (modern) devices */
>> +uint32_t guest_features[2];
>> +VirtIOMMIOQueue vqs[VIRTIO_QUEUE_MAX];
>>  } VirtIOMMIOProxy;
>>  
>>  static bool virtio_mmio_ioeventfd_enabled(DeviceState *d)
>> @@ -115,7 +128,11 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr 
>> offset, unsigned size)
>>  case VIRTIO_MMIO_MAGIC_VALUE:
>>  return VIRT_MAGIC;
>>  case VIRTIO_MMIO_VERSION:
>> -return VIRT_VERSION;
>> +if (proxy->modern) {
>> +return VIRT_VERSION_MODERN;
>> +} else {
>> +return VIRT_VERSION_LEGACY;
>> +}
>>  case VIRTIO_MMIO_VENDOR_ID:
>>  return VIRT_VENDOR;
>>  default:
>>

Re: [Qemu-devel] [PATCH v5 2/5] hw/block/pflash_cfi01: Use the correct READ_ARRAY value

2019-07-31 Thread Markus Armbruster

Philippe Mathieu-Daudé  writes:

> Hi Markus.
>
> On 7/16/19 5:12 PM, Markus Armbruster wrote:
>> "Dr. David Alan Gilbert"  writes:
>> 
>>> * Markus Armbruster (arm...@redhat.com) wrote:
 Philippe asked me to have a look at this one, so here goes.
>
> Thanks a lot for your careful analysis.
>
> I got scared the uh-oh you raised would get this series or rework of it
> still refused for the release, so I went for a quick-and-dirty bugfix.

This close to the release, minimal bug fix now and cleanup later makes
lots of sense.

> Since this bugfix got merged (as commit 3a283507c0347), I can now think
> again about how to properly fix this (if it is fixable...).
>
 Philippe Mathieu-Daudé  writes:

> In the document [*] the "Read Array Flowchart", the READ_ARRAY
> command has a value of 0xff.
>
> Use the correct value in the pflash model.
>
> There is no change of behavior in the guest, because:
> - when the guest were sending 0xFF, the reset_flash label
>   was setting the command value as 0x00
> - 0x00 was used internally for READ_ARRAY

 *Groan*

 Is this cleanup, or does it fix an observable bug?
>
> Well it depends where you stand.
>
> I have a few patches on top of this adding trace events (4.2 material)
> and while debugging it was not making sense with the CFI specs.
>
> 1/ The guest writes 0xFF to go in READ_ARRAY mode, the model report a
> warning and take the switch default case which calls pflash_reset(),
> which happens to set the flash in READ_ARRAY.

This one, in pflash_write()?

switch (pfl->wcycle) {
case 0:
...
--->case 0xff: /* Read array mode */
DPRINTF("%s: Read array mode\n", __func__);
goto reset_flash;
...
}
return;
...
 reset_flash:
trace_pflash_reset();
memory_region_rom_device_set_romd(&pfl->mem, true);
pfl->wcycle = 0;
pfl->cmd = 0;

I can't see a warning here.

Let's ignore the tracepoint.

Is the memory_region_rom_device_set_romd() appropriate for READ_ARRAY?

pfl->wcycle = 0 is a no-op.

pfl->cmd = 0 is part of the "use 0 instead 0f 0xFF to represent
READ_ARRAY state" madness.

By the way, use of tracing and DPRINTF() in the same .c is an
anti-pattern.  Care to convert the remaining DPRINTF() into tracepoints?
Feel free to delete useless ones, if any.

> 2/ Then a later series adds the CFI specs timings (like the CFI02
> model), because it would useful to test the UEFI Capsule Update feature
> with real-time behavior. For the 'Virt' pflash, the timing is disabled.
> Now running a non-Virt pflash, it becomes very slow because each time
> the guest goes into READ_ARRAY mode, the reset delay (which is the
> longest) occurs.

Feels like a latent bug.  Adding timing turns it into a real one.

> Talking with Laszlo, I figured for 1/ instead of fixing the model, I can
> display 0x00 as 0xFF and ignore the pflash_reset() when the caller is
> not system_reset(). Dirty again.
>
> For 2/ it is not that easy, it will depends if there is more interest
> from the UEFI community (Intel parallel NOR flashes are used on x86 and
> aarch64 UEFI platforms).
>
> If we justify 1/ and 2/ are not important, then it is simply a cleanup.

If it's a bug fix, have the commit message explain the bug.

If it's just cleanup, heave the commit message say so.

> To keep migration with older versions behaving correctly, we
> decide to always migrate the READ_ARRAY as 0x00.
>
> If the CFI open standard decide to assign a new command of value
> 0x00, this model is flawed because it uses this value internally.
> If a guest eventually requires this new CFI feature, a different
> model will be required (or this same model but breaking backward
> migration). So it is safe to keep migrating READ_ARRAY as 0x00.

 We could perhaps keep migration working for "benign" device states, with
 judicious use of subsections.  We'll cross that bridge when we get to
 it.

> [*] "Common Flash Interface (CFI) and Command Sets"
> (Intel Application Note 646)
> Appendix B "Basic Command Set"
>
> Reviewed-by: John Snow 
> Reviewed-by: Alistair Francis 
> Regression-tested-by: Laszlo Ersek 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> v3: Handle migrating the 'cmd' field.
> v4: Handle migrating to older QEMU (Dave)
> v5: Add a paragraph about why this model is flawed due to
> historically using READ_ARRAY as 0x00 (Dave, Peter).
>
> Since Laszlo stated he did not test migration [*], I'm keeping his
> test tag, because the change with v2 has no impact in the tests
> he ran.
>
> Likewise I'm keeping John and Alistair tags, but I'd like an extra
> review for the migration change, thanks!
>
> [*] https://lists.gnu.org/archive/html/qemu-devel/2019-07/msg00679.html
> ---
>  hw/block/pflash_cfi01.c | 57 ++---
>>

Re: [Qemu-devel] [PATCH 3/3] i386/kvm: initialize struct at full before ioctl call

2019-07-31 Thread Christian Borntraeger




On 31.07.19 14:04, Andrey Shinkevich wrote:
> On 31/07/2019 10:24, Christian Borntraeger wrote:
>>
>>
>> On 30.07.19 21:20, Paolo Bonzini wrote:
>>> On 30/07/19 18:01, Andrey Shinkevich wrote:
 Not the whole structure is initialized before passing it to the KVM.
 Reduce the number of Valgrind reports.

 Signed-off-by: Andrey Shinkevich 
>>>
>>> Christian, is this the right fix?  It's not expensive so it wouldn't be
>>> an issue, just checking if there's any better alternative.
>>
>> I think all of these variants are valid with pros and cons
>> 1. teach valgrind about this:
>> Add to coregrind/m_syswrap/syswrap-linux.c (and the relevant header files)
>> knowledge about which parts are actually touched.
>> 2. use designated initializers
>> 3. use memset
>> 3. use a valgrind callback VG_USERREQ__MAKE_MEM_DEFINED to tell that this 
>> memory is defined
>>
> 
> Thank you all very much for taking part in the discussion.
> Also, one may use the Valgrind technology to suppress the unwanted 
> reports by adding the Valgrind specific format file valgrind.supp to the 
> QEMU project. The file content is extendable for future needs.
> All the cases we like to suppress will be recounted in that file.
> A case looks like the stack fragments. For instance, from QEMU block:
> 
> {
> hw/block/hd-geometry.c
> Memcheck:Cond
> fun:guess_disk_lchs
> fun:hd_geometry_guess
> fun:blkconf_geometry
> ...
> fun:device_set_realized
> fun:property_set_bool
> fun:object_property_set
> fun:object_property_set_qobject
> fun:object_property_set_bool
> }
> 
> The number of suppressed cases are reported by the Valgrind with every 
> run: "ERROR SUMMARY: 5 errors from 3 contexts (suppressed: 0 from 0)"
> 
> Andrey

Yes, indeed that would be another variant. How performance critical are
the fixed locations? That might have an impact on what is the best solution.
>From a cleanliness approach doing 1 (adding the ioctl definition to valgrind)
is certainly the most beautiful way. I did that in the past, look for example at

https://sourceware.org/git/?p=valgrind.git;a=commitdiff;h=c2baee9b7bf043702c130de0771a4df439fcf403
or 
https://sourceware.org/git/?p=valgrind.git;a=commitdiff;h=00a31dd3d1e7101b331c2c83fca6c666ba35d910

for examples. 


> 
>>>
>>> Paolo
>>>
 ---
   target/i386/kvm.c | 3 +++
   1 file changed, 3 insertions(+)

 diff --git a/target/i386/kvm.c b/target/i386/kvm.c
 index dbbb137..ed57e31 100644
 --- a/target/i386/kvm.c
 +++ b/target/i386/kvm.c
 @@ -190,6 +190,7 @@ static int kvm_get_tsc(CPUState *cs)
   return 0;
   }
   
 +memset(&msr_data, 0, sizeof(msr_data));
   msr_data.info.nmsrs = 1;
   msr_data.entries[0].index = MSR_IA32_TSC;
   env->tsc_valid = !runstate_is_running();
 @@ -1706,6 +1707,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
   
   if (has_xsave) {
   env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
 +memset(env->xsave_buf, 0, sizeof(struct kvm_xsave));
   }
   
   max_nested_state_len = kvm_max_nested_state_length();
 @@ -3477,6 +3479,7 @@ static int kvm_put_debugregs(X86CPU *cpu)
   return 0;
   }
   
 +memset(&dbgregs, 0, sizeof(dbgregs));
   for (i = 0; i < 4; i++) {
   dbgregs.db[i] = env->dr[i];
   }
 -- 
 1.8.3.1

>>>
>>>
>>
>

Re: [Qemu-devel] [PATCH 3/3] i386/kvm: initialize struct at full before ioctl call

On 31/07/19 11:05, Christophe de Dinechin wrote:
> 
> Christian Borntraeger writes:
> 
>> On 30.07.19 18:44, Philippe Mathieu-Daudé wrote:
>>> On 7/30/19 6:01 PM, Andrey Shinkevich wrote:
 Not the whole structure is initialized before passing it to the KVM.
 Reduce the number of Valgrind reports.

 Signed-off-by: Andrey Shinkevich 
 ---
  target/i386/kvm.c | 3 +++
  1 file changed, 3 insertions(+)

 diff --git a/target/i386/kvm.c b/target/i386/kvm.c
 index dbbb137..ed57e31 100644
 --- a/target/i386/kvm.c
 +++ b/target/i386/kvm.c
 @@ -190,6 +190,7 @@ static int kvm_get_tsc(CPUState *cs)
  return 0;
  }

 +memset(&msr_data, 0, sizeof(msr_data));
>>>
>>> I wonder the overhead of this one...
>>
>> Cant we use designated initializers like in
>>
>> commit bdfc8480c50a53d91aa9a513d23a84de0d5fbc86
>> Author: Christian Borntraeger 
>> AuthorDate: Thu Oct 30 09:23:41 2014 +0100
>> Commit: Paolo Bonzini 
>> CommitDate: Mon Dec 15 12:21:01 2014 +0100
>>
>> valgrind/i386: avoid false positives on KVM_SET_XCRS ioctl
>>
>> and others?
>>
>> This should minimize the impact.
> 
> Oh, when you talked about using designated initializers, I thought you
> were talking about fully initializing the struct, like so:

Yeah, that would be good too.  For now I'm applying Andrey's series though.

Paolo

> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> index dbbb13772a..3533870c43 100644
> --- a/target/i386/kvm.c
> +++ b/target/i386/kvm.c
> @@ -180,19 +180,20 @@ static int kvm_get_tsc(CPUState *cs)
>  {
>  X86CPU *cpu = X86_CPU(cs);
>  CPUX86State *env = &cpu->env;
> -struct {
> -struct kvm_msrs info;
> -struct kvm_msr_entry entries[1];
> -} msr_data;
>  int ret;
> 
>  if (env->tsc_valid) {
>  return 0;
>  }
> 
> -msr_data.info.nmsrs = 1;
> -msr_data.entries[0].index = MSR_IA32_TSC;
> -env->tsc_valid = !runstate_is_running();
> +struct {
> +struct kvm_msrs info;
> +struct kvm_msr_entry entries[1];
> +} msr_data = {
> +.info = { .nmsrs =  1 },
> +.entries = { [0] = { .index = MSR_IA32_TSC } }
> +};
> + env->tsc_valid = !runstate_is_running();
> 
>  ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
>  if (ret < 0) {
> 
> 
> This gives the compiler maximum opportunities to flag mistakes like
> initializing the same thing twice, and make it easier (read no smart
> optimizations) to initialize in one go. Moving the declaration past the
> 'if' also addresses Philippe's concern.
> 
>>>
  msr_data.info.nmsrs = 1;
  msr_data.entries[0].index = MSR_IA32_TSC;
  env->tsc_valid = !runstate_is_running();
 @@ -1706,6 +1707,7 @@ int kvm_arch_init_vcpu(CPUState *cs)

  if (has_xsave) {
  env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
 +memset(env->xsave_buf, 0, sizeof(struct kvm_xsave));
>>>
>>> OK
>>>
  }

  max_nested_state_len = kvm_max_nested_state_length();
 @@ -3477,6 +3479,7 @@ static int kvm_put_debugregs(X86CPU *cpu)
  return 0;
  }

 +memset(&dbgregs, 0, sizeof(dbgregs));
>>>
>>> OK
>>>
  for (i = 0; i < 4; i++) {
  dbgregs.db[i] = env->dr[i];
  }
>>>
>>> We could remove 'dbgregs.flags = 0;'
>>>
>>> Reviewed-by: Philippe Mathieu-Daudé 
>>>
> 
> 
> --
> Cheers,
> Christophe de Dinechin (IRC c3d)
>

Re: [Qemu-devel] [PATCH v2 4/8] hw/core: Add a config switch for the "register" device

On 31/07/19 13:44, Philippe Mathieu-Daudé wrote:
> What about naming it REGISTER_ARRAY or REGISTER_BLOCK?
> 
> The API is:
> 
> RegisterInfoArray *register_init_block32(...);
> 
> Cc'ing Alistair for better name ideas :)
> 

I think REGISTER is okay. :)

Paolo

Re: [Qemu-devel] [PATCH for-4.2 v2 0/8] Kconfig switches

On 31/07/19 09:56, Thomas Huth wrote:
> Here are some more Kconfig patches that clean up the switches of
> existing devices and introduce proper config switches for some
> other devices that were always enabled before.
> 
> v2:
>  - Included Philippe's patches to avoid a conflict with XLNX_ZYNQMP
>  - Don't rely on indirect dependencies, always "select XYZ" if it is
>adequate
>  - Added patch for the generic loader device
> 
> Philippe Mathieu-Daudé (3):
>   hw/Kconfig: Move the generic XLNX_ZYNQMP to the root hw/Kconfig
>   hw/intc: Only build the xlnx-iomod-intc device for the MicroBlaze PMU
>   hw/dma: Do not build the xlnx_dpdma device for the MicroBlaze machines
> 
> Thomas Huth (5):
>   hw/core: Add a config switch for the "register" device
>   hw/core: Add a config switch for the "or-irq" device
>   hw/core: Add a config switch for the "split-irq" device
>   hw/misc: Add a config switch for the "unimplemented" device
>   hw/core: Add a config switch for the generic loader device
> 
>  hw/Kconfig|  4 
>  hw/arm/Kconfig| 15 +++
>  hw/core/Kconfig   | 13 +
>  hw/core/Makefile.objs |  8 
>  hw/dma/Kconfig|  1 +
>  hw/dma/Makefile.objs  |  1 -
>  hw/intc/Makefile.objs |  2 +-
>  hw/microblaze/Kconfig |  1 +
>  hw/misc/Kconfig   |  3 +++
>  hw/misc/Makefile.objs |  2 +-
>  hw/pci-host/Kconfig   |  3 ++-
>  hw/sparc64/Kconfig|  1 +
>  hw/timer/Kconfig  |  3 ---
>  13 files changed, 46 insertions(+), 11 deletions(-)
> 

Looks good.  Peter, are you picking it?

Paolo

Re: [Qemu-devel] [PATCH 3/3] i386/kvm: initialize struct at full before ioctl call

2019-07-31 Thread Christian Borntraeger




On 31.07.19 14:28, Christian Borntraeger wrote:
> 
> 
> On 31.07.19 14:04, Andrey Shinkevich wrote:
>> On 31/07/2019 10:24, Christian Borntraeger wrote:
>>>
>>>
>>> On 30.07.19 21:20, Paolo Bonzini wrote:
 On 30/07/19 18:01, Andrey Shinkevich wrote:
> Not the whole structure is initialized before passing it to the KVM.
> Reduce the number of Valgrind reports.
>
> Signed-off-by: Andrey Shinkevich 

 Christian, is this the right fix?  It's not expensive so it wouldn't be
 an issue, just checking if there's any better alternative.
>>>
>>> I think all of these variants are valid with pros and cons
>>> 1. teach valgrind about this:
>>> Add to coregrind/m_syswrap/syswrap-linux.c (and the relevant header files)
>>> knowledge about which parts are actually touched.
>>> 2. use designated initializers
>>> 3. use memset
>>> 3. use a valgrind callback VG_USERREQ__MAKE_MEM_DEFINED to tell that this 
>>> memory is defined
>>>
>>
>> Thank you all very much for taking part in the discussion.
>> Also, one may use the Valgrind technology to suppress the unwanted 
>> reports by adding the Valgrind specific format file valgrind.supp to the 
>> QEMU project. The file content is extendable for future needs.
>> All the cases we like to suppress will be recounted in that file.
>> A case looks like the stack fragments. For instance, from QEMU block:
>>
>> {
>> hw/block/hd-geometry.c
>> Memcheck:Cond
>> fun:guess_disk_lchs
>> fun:hd_geometry_guess
>> fun:blkconf_geometry
>> ...
>> fun:device_set_realized
>> fun:property_set_bool
>> fun:object_property_set
>> fun:object_property_set_qobject
>> fun:object_property_set_bool
>> }
>>
>> The number of suppressed cases are reported by the Valgrind with every 
>> run: "ERROR SUMMARY: 5 errors from 3 contexts (suppressed: 0 from 0)"
>>
>> Andrey
> 
> Yes, indeed that would be another variant. How performance critical are
> the fixed locations? That might have an impact on what is the best solution.
> From a cleanliness approach doing 1 (adding the ioctl definition to valgrind)
> is certainly the most beautiful way. I did that in the past, look for example 
> at
> 
> https://sourceware.org/git/?p=valgrind.git;a=commitdiff;h=c2baee9b7bf043702c130de0771a4df439fcf403
> or 
> https://sourceware.org/git/?p=valgrind.git;a=commitdiff;h=00a31dd3d1e7101b331c2c83fca6c666ba35d910
> 
> for examples. 
> 
> 
>>

 Paolo

> ---
>   target/i386/kvm.c | 3 +++
>   1 file changed, 3 insertions(+)
>
> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> index dbbb137..ed57e31 100644
> --- a/target/i386/kvm.c
> +++ b/target/i386/kvm.c
> @@ -190,6 +190,7 @@ static int kvm_get_tsc(CPUState *cs)
>   return 0;
>   }
>   
> +memset(&msr_data, 0, sizeof(msr_data));
>   msr_data.info.nmsrs = 1;
>   msr_data.entries[0].index = MSR_IA32_TSC;
>   env->tsc_valid = !runstate_is_running();
> @@ -1706,6 +1707,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
>   
>   if (has_xsave) {
>   env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
> +memset(env->xsave_buf, 0, sizeof(struct kvm_xsave));

This is memsetting 4k? 
Yet another variant would be to use the RUNNING_ON_VALGRIND macro from
valgrind/valgrind.h to only memset for valgrind. But just using MAKE_MEM_DEFINED
from memcheck.h is simpler.

[Qemu-devel] [PATCH] riscv: rv32: Root page table address can be larger than 32-bit

2019-07-31 Thread Bin Meng

For RV32, the root page table's PPN has 22 bits hence its address
bits could be larger than the maximum bits that target_ulong is
able to represent. Use hwaddr instead.

Signed-off-by: Bin Meng 
---

 target/riscv/cpu_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index e32b612..3150a6a 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -176,7 +176,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr 
*physical,
 
 *prot = 0;
 
-target_ulong base;
+hwaddr base;
 int levels, ptidxbits, ptesize, vm, sum;
 int mxr = get_field(env->mstatus, MSTATUS_MXR);
 
@@ -239,7 +239,7 @@ restart:
((1 << ptidxbits) - 1);
 
 /* check that physical address of PTE is legal */
-target_ulong pte_addr = base + idx * ptesize;
+hwaddr pte_addr = base + idx * ptesize;
 
 if (riscv_feature(env, RISCV_FEATURE_PMP) &&
 !pmp_hart_has_privs(env, pte_addr, sizeof(target_ulong),
-- 
2.7.4

Re: [Qemu-devel] [PATCH RESEND v8 02/11] numa: move numa global variable nb_numa_nodes into MachineState

2019-07-31 Thread Igor Mammedov

On Wed, 31 Jul 2019 09:12:00 +0800
Tao Xu  wrote:

> Add struct NumaState in MachineState and move existing numa global
> nb_numa_nodes(renamed as "num_nodes") into NumaState. And add variable
> numa_support into MachineClass to decide which submachines support NUMA.
> 
> Suggested-by: Igor Mammedov 
> Suggested-by: Eduardo Habkost 
> Signed-off-by: Tao Xu 

Reviewed-by: Igor Mammedov 

> ---
> 
> Changes in v8:
> - Add check if numa->numa_state is NULL in pxb_dev_realize_common
> - Use nb_nodes in spapr_populate_memory() (Igor)
> ---
>  exec.c  |  5 ++-
>  hw/acpi/aml-build.c |  3 +-
>  hw/arm/boot.c   |  4 +-
>  hw/arm/sbsa-ref.c   |  4 +-
>  hw/arm/virt-acpi-build.c| 10 +++--
>  hw/arm/virt.c   |  4 +-
>  hw/core/machine-hmp-cmds.c  | 12 --
>  hw/core/machine.c   | 14 +--
>  hw/core/numa.c  | 60 +
>  hw/i386/acpi-build.c|  2 +-
>  hw/i386/pc.c|  9 +++--
>  hw/mem/pc-dimm.c|  2 +
>  hw/pci-bridge/pci_expander_bridge.c |  8 +++-
>  hw/ppc/spapr.c  | 19 -
>  include/hw/acpi/aml-build.h |  2 +-
>  include/hw/boards.h |  1 +
>  include/sysemu/numa.h   | 10 -
>  17 files changed, 110 insertions(+), 59 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 3e78de3b8f..4fd6ec2bd0 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1749,6 +1749,7 @@ long qemu_minrampagesize(void)
>  long hpsize = LONG_MAX;
>  long mainrampagesize;
>  Object *memdev_root;
> +MachineState *ms = MACHINE(qdev_get_machine());
>  
>  mainrampagesize = qemu_mempath_getpagesize(mem_path);
>  
> @@ -1776,7 +1777,9 @@ long qemu_minrampagesize(void)
>   * so if its page size is smaller we have got to report that size 
> instead.
>   */
>  if (hpsize > mainrampagesize &&
> -(nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
> +(ms->numa_state == NULL ||
> + ms->numa_state->num_nodes == 0 ||
> + numa_info[0].node_memdev == NULL)) {
>  static bool warned;
>  if (!warned) {
>  error_report("Huge page support disabled (n/a for main 
> memory).");
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index 555c24f21d..63c1cae8c9 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -1726,10 +1726,11 @@ void build_srat_memory(AcpiSratMemoryAffinity 
> *numamem, uint64_t base,
>   * ACPI spec 5.2.17 System Locality Distance Information Table
>   * (Revision 2.0 or later)
>   */
> -void build_slit(GArray *table_data, BIOSLinker *linker)
> +void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms)
>  {
>  int slit_start, i, j;
>  slit_start = table_data->len;
> +int nb_numa_nodes = ms->numa_state->num_nodes;
>  
>  acpi_data_push(table_data, sizeof(AcpiTableHeader));
>  
> diff --git a/hw/arm/boot.c b/hw/arm/boot.c
> index ba604f8277..d02d2dae85 100644
> --- a/hw/arm/boot.c
> +++ b/hw/arm/boot.c
> @@ -598,9 +598,9 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info 
> *binfo,
>  }
>  g_strfreev(node_path);
>  
> -if (nb_numa_nodes > 0) {
> +if (ms->numa_state != NULL && ms->numa_state->num_nodes > 0) {
>  mem_base = binfo->loader_start;
> -for (i = 0; i < nb_numa_nodes; i++) {
> +for (i = 0; i < ms->numa_state->num_nodes; i++) {
>  mem_len = numa_info[i].node_mem;
>  rc = fdt_add_memory_node(fdt, acells, mem_base,
>   scells, mem_len, i);
> diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
> index 2aba3c58c5..22847909bf 100644
> --- a/hw/arm/sbsa-ref.c
> +++ b/hw/arm/sbsa-ref.c
> @@ -144,6 +144,7 @@ static void create_fdt(SBSAMachineState *sms)
>  {
>  void *fdt = create_device_tree(&sms->fdt_size);
>  const MachineState *ms = MACHINE(sms);
> +int nb_numa_nodes = ms->numa_state->num_nodes;
>  int cpu;
>  
>  if (!fdt) {
> @@ -760,7 +761,7 @@ sbsa_ref_cpu_index_to_props(MachineState *ms, unsigned 
> cpu_index)
>  static int64_t
>  sbsa_ref_get_default_cpu_node_id(const MachineState *ms, int idx)
>  {
> -return idx % nb_numa_nodes;
> +return idx % ms->numa_state->num_nodes;
>  }
>  
>  static void sbsa_ref_instance_init(Object *obj)
> @@ -787,6 +788,7 @@ static void sbsa_ref_class_init(ObjectClass *oc, void 
> *data)
>  mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids;
>  mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props;
>  mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id;
> +mc->numa_mem_supported = true;
>  }
>  
>  static const TypeInfo sbsa_ref_info = {
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index 0afb372769..a2cc4b84fe 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw

[Qemu-devel] [PATCH] riscv: hmp: Add a command to show virtual memory mappings

2019-07-31 Thread Bin Meng

This adds 'info mem' command for RISC-V, to show virtual memory
mappings that aids debugging.

Rather than showing every valid PTE, the command compacts the
output by merging all contiguous physical address mappings into
one block and only shows the merged block mapping details.

Signed-off-by: Bin Meng 
---

 hmp-commands-info.hx   |   2 +-
 target/riscv/Makefile.objs |   4 +
 target/riscv/monitor.c | 227 +
 3 files changed, 232 insertions(+), 1 deletion(-)
 create mode 100644 target/riscv/monitor.c

diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index c59444c..257ee7d 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -249,7 +249,7 @@ STEXI
 Show virtual to physical memory mappings.
 ETEXI
 
-#if defined(TARGET_I386)
+#if defined(TARGET_I386) || defined(TARGET_RISCV)
 {
 .name   = "mem",
 .args_type  = "",
diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
index b1c79bc..a8ceccd 100644
--- a/target/riscv/Makefile.objs
+++ b/target/riscv/Makefile.objs
@@ -1,5 +1,9 @@
 obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o 
gdbstub.o pmp.o
 
+ifeq ($(CONFIG_SOFTMMU),y)
+obj-y += monitor.o
+endif
+
 DECODETREE = $(SRC_PATH)/scripts/decodetree.py
 
 decode32-y = $(SRC_PATH)/target/riscv/insn32.decode
diff --git a/target/riscv/monitor.c b/target/riscv/monitor.c
new file mode 100644
index 000..30560ff
--- /dev/null
+++ b/target/riscv/monitor.c
@@ -0,0 +1,227 @@
+/*
+ * QEMU monitor for RISC-V
+ *
+ * Copyright (c) 2019 Bin Meng 
+ *
+ * RISC-V specific monitor commands implementation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "cpu_bits.h"
+#include "monitor/monitor.h"
+#include "monitor/hmp-target.h"
+
+#ifdef TARGET_RISCV64
+#define PTE_HEADER_FIELDS   "vaddrpaddr"\
+"size attr\n"
+#define PTE_HEADER_DELIMITER"  "\
+" ---\n"
+#else
+#define PTE_HEADER_FIELDS   "vaddrpaddrsize attr\n"
+#define PTE_HEADER_DELIMITER"   ---\n"
+#endif
+
+/* Perform linear address sign extension */
+static target_ulong addr_canonical(int va_bits, target_ulong addr)
+{
+#ifdef TARGET_RISCV64
+if (addr & (1UL << (va_bits - 1))) {
+addr |= (hwaddr)-(1L << va_bits);
+}
+#endif
+
+return addr;
+}
+
+static void print_pte_header(Monitor *mon)
+{
+monitor_printf(mon, PTE_HEADER_FIELDS);
+monitor_printf(mon, PTE_HEADER_DELIMITER);
+}
+
+static void print_pte(Monitor *mon, int va_bits, target_ulong vaddr,
+  hwaddr paddr, target_ulong size, int attr)
+{
+/* santity check on vaddr */
+if (vaddr >= (1UL << va_bits)) {
+return;
+}
+
+if (!size) {
+return;
+}
+
+monitor_printf(mon, TARGET_FMT_lx " " TARGET_FMT_plx " " TARGET_FMT_lx
+   " %c%c%c%c%c%c%c\n",
+   addr_canonical(va_bits, vaddr),
+   paddr, size,
+   attr & PTE_R ? 'r' : '-',
+   attr & PTE_W ? 'w' : '-',
+   attr & PTE_X ? 'x' : '-',
+   attr & PTE_U ? 'u' : '-',
+   attr & PTE_G ? 'g' : '-',
+   attr & PTE_A ? 'a' : '-',
+   attr & PTE_D ? 'd' : '-');
+}
+
+static void walk_pte(Monitor *mon, hwaddr base, target_ulong start,
+ int level, int ptidxbits, int ptesize, int va_bits,
+ hwaddr *vbase, hwaddr *pbase, hwaddr *last_paddr,
+ target_ulong *last_size, int *last_attr)
+{
+hwaddr pte_addr;
+hwaddr paddr;
+target_ulong pgsize;
+target_ulong pte;
+int ptshift;
+int attr;
+int idx;
+
+if (level < 0) {
+return;
+}
+
+ptshift = level * ptidxbits;
+pgsize = 1UL << (PGSHIFT + ptshift);
+
+for (idx = 0; idx < (1UL << ptidxbits); idx++) {
+pte_addr = base + idx * ptesize;
+cpu_physical_memory_read(pte_addr, &pte, ptesize);
+
+paddr = (pte >> PTE_PPN_SHIFT) << PGSHIFT;
+attr = pte & 0xff;
+
+/* PTE has to be valid */
+if (attr & PTE_V) {
+if (attr & (PTE_R | PTE_W | PTE_X)) {
+/*
+

Re: [Qemu-devel] [PATCH v2 0/3] require newer glib2 to enable autofree'ing of stack variables exiting scope

2019-07-31 Thread Marc-André Lureau

On Thu, Jul 25, 2019 at 12:44 PM Daniel P. Berrangé  wrote:
>
> Both GCC and CLang support a C extension attribute((cleanup)) which
> allows you to define a function that is invoked when a stack variable
> exits scope. This typically used to free the memory allocated to it,
> though you're not restricted to this. For example it could be used to
> unlock a mutex.
>
> We could use that functionality now, but the syntax is a bit ugly in
> plain C. Since version 2.44 of GLib, there have been a few macros to
> make it more friendly to use - g_autofree, g_autoptr and
> G_DEFINE_AUTOPTR_CLEANUP_FUNC.
>
>   https://developer.gnome.org/glib/stable/glib-Miscellaneous-Macros.html
>
>   https://blogs.gnome.org/desrt/2015/01/30/g_autoptr/
>
> The key selling point is that it simplifies the cleanup code paths,
> often eliminating the need to goto cleanup labels. This improves
> the readability of the code and makes it less likely that you'll
> leak memory accidentally.
>
> Inspired by seeing it added to glib, and used in systemd, Libvirt
> finally got around to adopting this in Feb 2019. Overall our
> experience with it has been favourable/positive, with the code
> simplification being very nice.
>
> The main caveats with it are
>
>  - Only works with GCC or CLang. We don't care as those are
>the only two compilers we declare support for.
>
>  - You must always initialize the variables when declared
>to ensure predictable behaviour when they leave scope.
>Chances are most methods with goto jumps for cleanup
>are doing this already
>
>  - You must not directly return the value that's assigned
>to a auto-cleaned variable. You must steal the pointer
>in some way. ie
>
> BAD:
> g_autofree char *wibble = g_strdup("wibble")
> 
> return wibble;
>
> GOOD:
> g_autofree char *wibble = g_strdup("wibble")
> ...
> return g_steal_pointer(wibble);
>
> g_steal_pointer is an inline function which simply copies
> the pointer to a new variable, and sets the original variable
> to NULL, thus avoiding cleanup.
>
> I've illustrated the usage by converting a bunch of the crypto code in
> QEMU to use auto cleanup.
>
> Changed on v2:
>
>  - Actually commit the rest of the changes to patch 3 so that what's
>posted works :-) Sigh.
>
> Daniel P. Berrangé (3):
>   glib: bump min required glib library version to 2.48
>   crypto: define cleanup functions for use with g_autoptr
>   crypto: use auto cleanup for many stack variables

Series:
Reviewed-by: Marc-André Lureau 

>
>  configure   |  2 +-
>  crypto/afsplit.c| 28 --
>  crypto/block-luks.c | 74 +++--
>  crypto/block.c  | 15 +++-
>  crypto/hmac-glib.c  |  5 ---
>  crypto/pbkdf.c  |  5 +--
>  crypto/secret.c | 38 ---
>  crypto/tlscredsanon.c   | 16 +++-
>  crypto/tlscredspsk.c|  5 +--
>  crypto/tlscredsx509.c   | 16 +++-
>  include/crypto/block.h  |  2 +
>  include/crypto/cipher.h |  2 +
>  include/crypto/hmac.h   |  2 +
>  include/crypto/ivgen.h  |  2 +
>  include/crypto/tlssession.h |  2 +
>  include/glib-compat.h   | 42 +
>  16 files changed, 78 insertions(+), 178 deletions(-)
>
> --
> 2.21.0
>
>


-- 
Marc-André Lureau

Re: [Qemu-devel] [PATCH 3/3] i386/kvm: initialize struct at full before ioctl call

On 31/07/19 14:43, Christian Borntraeger wrote:
>>   if (has_xsave) {
>>   env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
>> +memset(env->xsave_buf, 0, sizeof(struct kvm_xsave));
> This is memsetting 4k? 
> Yet another variant would be to use the RUNNING_ON_VALGRIND macro from
> valgrind/valgrind.h to only memset for valgrind. But just using 
> MAKE_MEM_DEFINED
> from memcheck.h is simpler. 
> 

Yes, it's 4k but only at initialization time and I actually prefer not
to have potentially uninitialized host data in there.

Paolo

Re: [Qemu-devel] [PATCH v3] blockjob: drain all job nodes in block_job_drain

2019-07-31 Thread John Snow




On 7/31/19 6:28 AM, Vladimir Sementsov-Ogievskiy wrote:
> 30.07.2019 22:11, John Snow wrote:
>>
>>
>> On 7/24/19 5:40 AM, Vladimir Sementsov-Ogievskiy wrote:
>>> Instead of draining additional nodes in each job code, let's do it in
>>> common block_job_drain, draining just all job's children.
>>> BlockJobDriver.drain becomes unused, so, drop it at all.
>>>
>>> It's also a first step to finally get rid of blockjob->blk.
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>>> ---
>>>
>>> v3: just resend, as I've some auto returned mails and not sure that
>>>  v2 reached recipients.
>>>
>>> v2: apply Max's suggestions:
>>>   - drop BlockJobDriver.drain
>>>   - do firtly loop of bdrv_drained_begin and then separate loop
>>> of bdrv_drained_end.
>>>
>>> Hmm, a question here: should I call bdrv_drained_end in reverse
>>> order? Or it's OK as is?
>>>
>>
>> I think it should be OK. These nodes don't necessarily have a well
>> defined relationship between each other, do they?
>>
>>>   include/block/blockjob_int.h | 11 ---
>>>   block/backup.c   | 18 +-
>>>   block/mirror.c   | 26 +++---
>>>   blockjob.c   | 13 -
>>>   4 files changed, 12 insertions(+), 56 deletions(-)
>>>
>>
>> Nice diffstat :)
>>
>>> diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
>>> index e4a318dd15..e1abf4ee85 100644
>>> --- a/include/block/blockjob_int.h
>>> +++ b/include/block/blockjob_int.h
>>> @@ -52,17 +52,6 @@ struct BlockJobDriver {
>>>* besides job->blk to the new AioContext.
>>>*/
>>>   void (*attached_aio_context)(BlockJob *job, AioContext *new_context);
>>> -
>>> -/*
>>> - * If the callback is not NULL, it will be invoked when the job has to 
>>> be
>>> - * synchronously cancelled or completed; it should drain 
>>> BlockDriverStates
>>> - * as required to ensure progress.
>>> - *
>>> - * Block jobs must use the default implementation for job_driver.drain,
>>> - * which will in turn call this callback after doing generic block job
>>> - * stuff.
>>> - */
>>> -void (*drain)(BlockJob *job);
>>
>> I was about to say "huh?" ... but then realized you're deleting this
>> confusing glob. Good.
>>
>>>   };
>>>   
>>>   /**
>>> diff --git a/block/backup.c b/block/backup.c
>>> index 715e1d3be8..7930004bbd 100644
>>> --- a/block/backup.c
>>> +++ b/block/backup.c
>>> @@ -320,21 +320,6 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
>>>   hbitmap_set(backup_job->copy_bitmap, 0, backup_job->len);
>>>   }
>>>   
>>> -static void backup_drain(BlockJob *job)
>>> -{
>>> -BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>>> -
>>> -/* Need to keep a reference in case blk_drain triggers execution
>>> - * of backup_complete...
>>> - */
>>> -if (s->target) {
>>> -BlockBackend *target = s->target;
>>> -blk_ref(target);
>>> -blk_drain(target);
>>> -blk_unref(target);
>>> -}
>>> -}
>>> -
>>
>> Adios ...
>>
>>>   static BlockErrorAction backup_error_action(BackupBlockJob *job,
>>>   bool read, int error)
>>>   {
>>> @@ -493,8 +478,7 @@ static const BlockJobDriver backup_job_driver = {
>>>   .commit = backup_commit,
>>>   .abort  = backup_abort,
>>>   .clean  = backup_clean,
>>> -},
>>> -.drain  = backup_drain,
>>> +}
>>>   };
>>>   
>>
>> This pleases the eyes.
>>
>>>   static int64_t backup_calculate_cluster_size(BlockDriverState *target,
>>> diff --git a/block/mirror.c b/block/mirror.c
>>> index 8cb75fb409..8456ccd89d 100644
>>> --- a/block/mirror.c
>>> +++ b/block/mirror.c
>>> @@ -644,14 +644,11 @@ static int mirror_exit_common(Job *job)
>>>   bdrv_ref(mirror_top_bs);
>>>   bdrv_ref(target_bs);
>>>   
>>> -/* Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
>>> +/*
>>> + * Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
>>
>> (Thanks, patchew...)
>>
>>>* inserting target_bs at s->to_replace, where we might not be able 
>>> to get
>>>* these permissions.
>>> - *
>>> - * Note that blk_unref() alone doesn't necessarily drop permissions 
>>> because
>>> - * we might be running nested inside mirror_drain(), which takes an 
>>> extra
>>> - * reference, so use an explicit blk_set_perm() first. */
>>> -blk_set_perm(s->target, 0, BLK_PERM_ALL, &error_abort);
>>> + */
>>>   blk_unref(s->target);
>>>   s->target = NULL;
>>>   
>>> @@ -1143,21 +1140,6 @@ static bool mirror_drained_poll(BlockJob *job)
>>>   return !!s->in_flight;
>>>   }
>>>   
>>> -static void mirror_drain(BlockJob *job)
>>> -{
>>> -MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
>>> -
>>> -/* Need to keep a reference in case blk_drain triggers execution

Re: [Qemu-devel] [PATCH 1/3] block/backup: deal with zero detection

2019-07-31 Thread John Snow




On 7/31/19 6:01 AM, Vladimir Sementsov-Ogievskiy wrote:
> 30.07.2019 21:40, John Snow wrote:
>>
>>
>> On 7/30/19 12:32 PM, Vladimir Sementsov-Ogievskiy wrote:
>>> We have detect_zeroes option, so at least for blockdev-backup user
>>> should define it if zero-detection is needed. For drive-backup leave
>>> detection enabled by default but do it through existing option instead
>>> of open-coding.
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>>> ---
>>>   block/backup.c | 15 ++-
>>>   blockdev.c |  8 
>>>   2 files changed, 10 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/block/backup.c b/block/backup.c
>>> index 715e1d3be8..f4aaf08df3 100644
>>> --- a/block/backup.c
>>> +++ b/block/backup.c
>>> @@ -110,7 +110,10 @@ static int coroutine_fn 
>>> backup_cow_with_bounce_buffer(BackupBlockJob *job,
>>>   BlockBackend *blk = job->common.blk;
>>>   int nbytes;
>>>   int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
>>> -int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING 
>>> : 0;
>>> +int write_flags =
>>> +(job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0) |
>>> +(job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
>>> +
>>>   
>>>   assert(QEMU_IS_ALIGNED(start, job->cluster_size));
>>>   hbitmap_reset(job->copy_bitmap, start, job->cluster_size);
>>> @@ -128,14 +131,8 @@ static int coroutine_fn 
>>> backup_cow_with_bounce_buffer(BackupBlockJob *job,
>>>   goto fail;
>>>   }
>>>   
>>> -if (buffer_is_zero(*bounce_buffer, nbytes)) {
>>> -ret = blk_co_pwrite_zeroes(job->target, start,
>>> -   nbytes, write_flags | 
>>> BDRV_REQ_MAY_UNMAP);
>>> -} else {
>>> -ret = blk_co_pwrite(job->target, start,
>>> -nbytes, *bounce_buffer, write_flags |
>>> -(job->compress ? BDRV_REQ_WRITE_COMPRESSED : 
>>> 0));
>>> -}
>>> +ret = blk_co_pwrite(job->target, start, nbytes, *bounce_buffer,
>>> +write_flags);
>>>   if (ret < 0) {
>>>   trace_backup_do_cow_write_fail(job, start, ret);
>>>   if (error_is_read) {
>>> diff --git a/blockdev.c b/blockdev.c
>>> index 4d141e9a1f..a94d754504 100644
>>> --- a/blockdev.c
>>> +++ b/blockdev.c
>>> @@ -3434,7 +3434,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, 
>>> JobTxn *txn,
>>>   BlockJob *job = NULL;
>>>   BdrvDirtyBitmap *bmap = NULL;
>>>   AioContext *aio_context;
>>> -QDict *options = NULL;
>>> +QDict *options;
>>>   Error *local_err = NULL;
>>>   int flags, job_flags = JOB_DEFAULT;
>>>   int64_t size;
>>> @@ -3529,10 +3529,10 @@ static BlockJob *do_drive_backup(DriveBackup 
>>> *backup, JobTxn *txn,
>>>   goto out;
>>>   }
>>>   
>>> +options = qdict_new();
>>> +qdict_put_str(options, "discard", "unmap");
>>> +qdict_put_str(options, "detect-zeroes", "unmap");
>>>   if (backup->format) {
>>> -if (!options) {
>>> -options = qdict_new();
>>> -}
>>>   qdict_put_str(options, "driver", backup->format);
>>>   }
>>>   
>>>
>>
>> I'm less sure of this one personally. Is it right to always try to set
>> unmap on the target?
>>
>> I like the idea of removing special cases and handling things more
>> centrally though, but I'll want Max (or Kevin) to take a peek.
>>
>> --js
>>
> 
> 
> If nobody minds I'd agree with you to drop zero detecting from both backups.
> 

I'm not sure it's WRONG either!

Re: [Qemu-devel] [PATCH 0/3] backup fixes for 4.1?

2019-07-31 Thread John Snow




On 7/31/19 6:29 AM, Vladimir Sementsov-Ogievskiy wrote:
> 30.07.2019 21:41, John Snow wrote:
>>
>>
>> On 7/30/19 12:32 PM, Vladimir Sementsov-Ogievskiy wrote:
>>> Hi all!
>>>
>>> Here are two small fixes.
>>>
>>> 01 is not a degradation at all, so it's OK for 4.2
>>> 02 is degradation of 3.0, so it's possibly OK for 4.2 too,
>>> but it seems to be real bug and fix is very simple, so,
>>> may be 4.1 is better
>>>
>>> Or you may take the whole series to 4.1 if you want.
>>>
>>
>> I think (1) and (2) can go in for stable after review, but they're not
>> crucial for 4.1 especially at this late of a stage. Should be cataclysms
>> only right now.
>>
>> --js
>>
> 
> I can rebase it than on your bitmaps branch. Or, if we want it for stable, 
> maybe,
> I shouldn't?
> 

Good point. Keep it based on main and I'll slip it in at the beginning
of the staging queue.

--js

[Qemu-devel] [Bug 1838465] Re: qemu-system-x86_64 kernel panic 30% of the time starting up VM

2019-07-31 Thread _

There are problems reliable booting the VM using TCG, HAXM, and Hyper-V.
TCG fails the least often. Attached is a pic of the error using HAXM, a
lot of "BUG: soft lockup detect on CPU#x!".

I tried to add logging but nothing ever shows up in the log file. I
tried adding "-d cpu,guest_errors -D E:\log.txt" to the command but the
log file is always empty.

** Attachment added: "haxmkernelpanic.png"

https://bugs.launchpad.net/qemu/+bug/1838465/+attachment/5280157/+files/haxmkernelpanic.png

--
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1838465

Title:
qemu-system-x86_64 kernel panic 30% of the time starting up VM

Status in QEMU:
New

Bug description:
I have created a Fedora Core 5 x86_64 VM image. When I run the image
using QEMU on Windows the VM hangs while loading the kernel about 30%
of the time. I am trying to use this VM with a CI software, looking at
the history the build failed 27 out of 79 attempts. QEMU 3.0.0 is
installed on the CI machine. I have tried using the exact same image
using QEMU on Linux (Ubuntu) and found the image boot successful every
time (40+ attempts). The VM image is fairly old it was created using
QEMU 0.11.1.

I have tried multiple versions on QEMU on windows; 0.11.1, 2.12.1, and
3.0.0 all of them fail randomly. I can reproduce the issue on several
different Windows 10 computers.

The command I am using to start the VM is “qemu-system-x86_64.exe -cpu
qemu64 -smp cores=2 -device e1000,netdev=net0 -boot menu=off -m 1G
-drive `"file=C:\qimages\Fedora-Core-5-x64.qcow2,index=0,media=disk`"
-snapshot -netdev user,id=net0,hostfwd=tcp::10022-:22”

I can provide the qcow image but it is somewhat large coming it at
4.15GB so I’m not sure what would be the best way to transfer it.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1838465/+subscriptions

Re: [Qemu-devel] [PATCH 2/3] block/backup: disable copy_range for compressed backup

30.07.2019 21:22, John Snow wrote:
> 
> 
> On 7/30/19 12:32 PM, Vladimir Sementsov-Ogievskiy wrote:
>> Enabled by default copy_range ignores compress option. It's definitely
>> unexpected for user.
>>
>> It's broken since introduction of copy_range usage in backup in
>> 9ded4a011496.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>> ---
>>   block/backup.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/block/backup.c b/block/backup.c
>> index f4aaf08df3..c5f941101a 100644
>> --- a/block/backup.c
>> +++ b/block/backup.c
>> @@ -645,7 +645,7 @@ BlockJob *backup_job_create(const char *job_id, 
>> BlockDriverState *bs,
>>   job->cluster_size = cluster_size;
>>   job->copy_bitmap = copy_bitmap;
>>   copy_bitmap = NULL;
>> -job->use_copy_range = true;
>> +job->use_copy_range = !compress; /* compression isn't supported for it 
>> */
>>   job->copy_range_size = 
>> MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
>>   blk_get_max_transfer(job->target));
>>   job->copy_range_size = MAX(job->cluster_size,
>>
> 
> Agree, these aren't compatible options. Is this worth a note in
> docs/interop/live-block-operations.rst?

Use_copy_range is not an user-visible option and it's not documented as I can 
see,
so I think we don't need.

> 
> Reviewed-by: John Snow 
> 


-- 
Best regards,
Vladimir

Re: [Qemu-devel] [RFC] virtio-mmio: implement modern (v2) personality (virtio-1)

2019-07-31 Thread Cornelia Huck

On Tue, 30 Jul 2019 16:18:52 -0400
"Michael S. Tsirkin"  wrote:

> On Tue, Jul 30, 2019 at 03:14:00PM +0200, Cornelia Huck wrote:
> > On Tue, 30 Jul 2019 14:17:48 +0200
> > Andrea Bolognani  wrote:
> >   
> > > On Tue, 2019-07-30 at 13:35 +0200, Cornelia Huck wrote:  
> > > > On Tue, 30 Jul 2019 12:25:30 +0200
> > > > Andrea Bolognani  wrote:
> > > > > Can you please make sure virtio-mmio uses the existing interface
> > > > > instead of introducing a new one?
> > > > 
> > > > FWIW, I really hate virtio-pci's disable-modern/disable-legacy... for a
> > > > starter, what is 'modern'? Will we have 'ultra-modern' in the future?   
> > > >  
> > > 
> > > AIUI the modern/legacy terminology is part of the VirtIO spec, so
> > > while I agree that it's not necessarily the least prone to ambiguity
> > > at least it's well defined.  
> > 
> > Legacy is, modern isn't :) Devices/drivers are conforming to the
> > standard, I don't think there's a special term for that.  
> 
> Right, if we followed the spec, disable-modern would have been
> force-legacy.
> 
> I'm fine with adding force-legacy for everyone and asking tools to
> transition if there. Document it's same as disable-modern for pci.
> Cornelia?

'force-legacy' is certainly better than 'disable-modern'. Not sure if
it's much of a gain at this point in time, and it does not really add
anything over limiting the revision to 0 for ccw, but I don't really
object to it.

> 
> 
> > >   
> > > > It is also quite backwards with the 'disable' terminology.
> > > 
> > > That's also true. I never claimed the way virtio-pci does it is
> > > perfect!
> > >   
> > > > We also have a different mechanism for virtio-ccw ('max_revision',
> > > > which covers a bit more than virtio-1; it doesn't have a 'min_revision',
> > > > as negotiating the revision down is fine), so I don't see why
> > > > virtio-mmio should replicate the virtio-pci mechanism.
> > > > 
> > > > Also, IIUC, virtio-mmio does not have transitional devices, but either
> > > > version 1 (legacy) or version 2 (virtio-1). It probably makes more
> > > > sense to expose the device version instead; either as an exact version
> > > > (especially if it isn't supposed to go up without incompatible
> > > > changes), or with some min/max concept (where version 1 would stand a
> > > > bit alone, so that would probably be a bit awkward.)
> > > 
> > > I think that if reinventing the wheel is generally agreed not to be
> > > a good idea, then it stands to reason that reinventing it twice can
> > > only be described as absolute madness :)
> > > 
> > > We should have a single way to control the VirtIO protocol version
> > > that works for all VirtIO devices, regardless of transport. We might
> > > even want to have virtio-*-{device,ccw}-non-transitional to mirror
> > > the existing virtio-*-pci-non-transitional.
> > > 
> > > FWIW, libvirt already implements support for (non)-transitional
> > > virtio-pci devices using either the dedicated devices or the base
> > > virtio-pci plus the disable-{modern,legacy} attributes.  
> > 
> > One problem (besides my dislike of the existing virtio-pci
> > interfaces :) is that pci, ccw, and mmio all have slightly different
> > semantics.
> > 
> > - pci: If we need to keep legacy support around, we cannot enable some
> >   features (IIRC, pci-e, maybe others as well.) That means transitional
> >   devices are in some ways inferior to virtio-1 only devices, so it
> >   makes a lot of sense to be able to configure devices without legacy
> >   support. The differences between legacy and virtio-1 are quite large.
> > - ccw: Has revisions negotiated between device and driver; virtio-1
> >   requires revision 1 or higher. (Legacy drivers that don't know the
> >   concept of revisions automatically get revision 0.) Differences
> >   between legacy and virtio-1 are mostly virtqueue endianness and some
> >   control structures.
> > - mmio: Has device versions offered by the device, the driver can take
> >   it or leave it. No transitional devices. Differences don't look as
> >   large as the ones for pci, either.
> > 
> > So, if we were to duplicate the same scheme as for pci for ccw and mmio
> > as well, we'd get
> > 
> > - ccw: devices that support revision 0 only (disable-modern), that act
> >   as today, or that support at least revision 1 (disable-legacy). We
> >   still need to keep max_revision around for backwards compatibility.
> >   Legacy only makes sense for compat machines (although this is
> >   equivalent to max_revision 0); I don't see a reason why you would
> >   want virtio-1 only devices, unless you'd want to rip out legacy
> >   support in QEMU completely.  
> 
> Reduce security attack surface slightly. Save some cycles
> (down the road) on branches in the endian-ness handling.

Most of that stuff is actually in the core code, right? Ripping out
legacy will have much more impact outside of ccw, I guess.

> Make sure your guests
> are all up to date in preparation to the

Re: [Qemu-devel] [PATCH v2 0/3] require newer glib2 to enable autofree'ing of stack variables exiting scope



Daniel P. Berrangé  writes:

> Both GCC and CLang support a C extension attribute((cleanup)) which
> allows you to define a function that is invoked when a stack variable
> exits scope. This typically used to free the memory allocated to it,
> though you're not restricted to this. For example it could be used to
> unlock a mutex.

>
> GOOD:
> g_autofree char *wibble = g_strdup("wibble")
>   ...
>   return g_steal_pointer(wibble);
>
> g_steal_pointer is an inline function which simply copies
> the pointer to a new variable, and sets the original variable
> to NULL, thus avoiding cleanup.

Surely this is a particular use case where you wouldn't use g_autofree
to declare the variable as you intending to return it to the outer scope?

--
Alex Bennée

Re: [Qemu-devel] [PATCH v2 0/3] require newer glib2 to enable autofree'ing of stack variables exiting scope

2019-07-31 Thread Eric Blake

On 7/31/19 9:04 AM, Alex Bennée wrote:
> 
> Daniel P. Berrangé  writes:
> 
>> Both GCC and CLang support a C extension attribute((cleanup)) which
>> allows you to define a function that is invoked when a stack variable
>> exits scope. This typically used to free the memory allocated to it,
>> though you're not restricted to this. For example it could be used to
>> unlock a mutex.
> 
>>
>> GOOD:
>> g_autofree char *wibble = g_strdup("wibble")
>>  ...
>>  return g_steal_pointer(wibble);
>>
>> g_steal_pointer is an inline function which simply copies
>> the pointer to a new variable, and sets the original variable
>> to NULL, thus avoiding cleanup.
> 
> Surely this is a particular use case where you wouldn't use g_autofree
> to declare the variable as you intending to return it to the outer scope?

Actually, it's still quite useful if you have intermediate returns:

g_autofree char *wibble = NULL;
if (something)
  return NULL;
wibble = g_strdup("wibble");
if (something_else)
  return NULL;
return g_steal_pointer(wibble);

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2 0/3] require newer glib2 to enable autofree'ing of stack variables exiting scope

2019-07-31 Thread Daniel P . Berrangé

On Wed, Jul 31, 2019 at 03:04:29PM +0100, Alex Bennée wrote:
> 
> Daniel P. Berrangé  writes:
> 
> > Both GCC and CLang support a C extension attribute((cleanup)) which
> > allows you to define a function that is invoked when a stack variable
> > exits scope. This typically used to free the memory allocated to it,
> > though you're not restricted to this. For example it could be used to
> > unlock a mutex.
> 
> >
> > GOOD:
> > g_autofree char *wibble = g_strdup("wibble")
> > ...
> > return g_steal_pointer(wibble);
> >
> > g_steal_pointer is an inline function which simply copies
> > the pointer to a new variable, and sets the original variable
> > to NULL, thus avoiding cleanup.
> 
> Surely this is a particular use case where you wouldn't use g_autofree
> to declare the variable as you intending to return it to the outer scope?

I think it depends on the situation. Obviously real code will have
something in the "..." part I snipped.

You have 20 code paths that can result in returning with an error, where
you want to have all variables freed, and only 1 code path for success
Then it makes sense to use g_autofree + g_steal_pointer to eliminate
many goto jumps.

If you have only 1 error path and 1 success path, then a traditional
g_free() call is may well be sufficient.

IOW, as with many coding "rules", there's scope to use personal
judgement as to when it is right to ignore it vs folow it.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [PATCH 3/3] i386/kvm: initialize struct at full before ioctl call

2019-07-31 Thread Andrey Shinkevich



On 31/07/2019 15:32, Paolo Bonzini wrote:
> On 31/07/19 11:05, Christophe de Dinechin wrote:
>>
>> Christian Borntraeger writes:
>>
>>> On 30.07.19 18:44, Philippe Mathieu-Daudé wrote:
 On 7/30/19 6:01 PM, Andrey Shinkevich wrote:
> Not the whole structure is initialized before passing it to the KVM.
> Reduce the number of Valgrind reports.
>
> Signed-off-by: Andrey Shinkevich 
> ---
>   target/i386/kvm.c | 3 +++
>   1 file changed, 3 insertions(+)
>
> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> index dbbb137..ed57e31 100644
> --- a/target/i386/kvm.c
> +++ b/target/i386/kvm.c
> @@ -190,6 +190,7 @@ static int kvm_get_tsc(CPUState *cs)
>   return 0;
>   }
>
> +memset(&msr_data, 0, sizeof(msr_data));

 I wonder the overhead of this one...
>>>
>>> Cant we use designated initializers like in
>>>
>>> commit bdfc8480c50a53d91aa9a513d23a84de0d5fbc86
>>> Author: Christian Borntraeger 
>>> AuthorDate: Thu Oct 30 09:23:41 2014 +0100
>>> Commit: Paolo Bonzini 
>>> CommitDate: Mon Dec 15 12:21:01 2014 +0100
>>>
>>>  valgrind/i386: avoid false positives on KVM_SET_XCRS ioctl
>>>
>>> and others?
>>>
>>> This should minimize the impact.
>>
>> Oh, when you talked about using designated initializers, I thought you
>> were talking about fully initializing the struct, like so:
> 
> Yeah, that would be good too.  For now I'm applying Andrey's series though.
> 
> Paolo
> 

Thank you.
As Philippe wrote, 'dbgregs.flags = 0;' is unnecessary with 'memset(0)'.

Andrey

>> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
>> index dbbb13772a..3533870c43 100644
>> --- a/target/i386/kvm.c
>> +++ b/target/i386/kvm.c
>> @@ -180,19 +180,20 @@ static int kvm_get_tsc(CPUState *cs)
>>   {
>>   X86CPU *cpu = X86_CPU(cs);
>>   CPUX86State *env = &cpu->env;
>> -struct {
>> -struct kvm_msrs info;
>> -struct kvm_msr_entry entries[1];
>> -} msr_data;
>>   int ret;
>>
>>   if (env->tsc_valid) {
>>   return 0;
>>   }
>>
>> -msr_data.info.nmsrs = 1;
>> -msr_data.entries[0].index = MSR_IA32_TSC;
>> -env->tsc_valid = !runstate_is_running();
>> +struct {
>> +struct kvm_msrs info;
>> +struct kvm_msr_entry entries[1];
>> +} msr_data = {
>> +.info = { .nmsrs =  1 },
>> +.entries = { [0] = { .index = MSR_IA32_TSC } }
>> +};
>> + env->tsc_valid = !runstate_is_running();
>>
>>   ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
>>   if (ret < 0) {
>>
>>
>> This gives the compiler maximum opportunities to flag mistakes like
>> initializing the same thing twice, and make it easier (read no smart
>> optimizations) to initialize in one go. Moving the declaration past the
>> 'if' also addresses Philippe's concern.
>>

>   msr_data.info.nmsrs = 1;
>   msr_data.entries[0].index = MSR_IA32_TSC;
>   env->tsc_valid = !runstate_is_running();
> @@ -1706,6 +1707,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
>
>   if (has_xsave) {
>   env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
> +memset(env->xsave_buf, 0, sizeof(struct kvm_xsave));

 OK

>   }
>
>   max_nested_state_len = kvm_max_nested_state_length();
> @@ -3477,6 +3479,7 @@ static int kvm_put_debugregs(X86CPU *cpu)
>   return 0;
>   }
>
> +memset(&dbgregs, 0, sizeof(dbgregs));

 OK

>   for (i = 0; i < 4; i++) {
>   dbgregs.db[i] = env->dr[i];
>   }

 We could remove 'dbgregs.flags = 0;'

 Reviewed-by: Philippe Mathieu-Daudé 

>>
>>
>> --
>> Cheers,
>> Christophe de Dinechin (IRC c3d)
>>
> 

-- 
With the best regards,
Andrey Shinkevich

Re: [Qemu-devel] [PATCH 3/3] i386/kvm: initialize struct at full before ioctl call

2019-07-31 Thread Andrey Shinkevich



On 31/07/2019 15:43, Christian Borntraeger wrote:
> 
> 
> On 31.07.19 14:28, Christian Borntraeger wrote:
>>
>>
>> On 31.07.19 14:04, Andrey Shinkevich wrote:
>>> On 31/07/2019 10:24, Christian Borntraeger wrote:


 On 30.07.19 21:20, Paolo Bonzini wrote:
> On 30/07/19 18:01, Andrey Shinkevich wrote:
>> Not the whole structure is initialized before passing it to the KVM.
>> Reduce the number of Valgrind reports.
>>
>> Signed-off-by: Andrey Shinkevich 
>
> Christian, is this the right fix?  It's not expensive so it wouldn't be
> an issue, just checking if there's any better alternative.

 I think all of these variants are valid with pros and cons
 1. teach valgrind about this:
 Add to coregrind/m_syswrap/syswrap-linux.c (and the relevant header files)
 knowledge about which parts are actually touched.
 2. use designated initializers
 3. use memset
 3. use a valgrind callback VG_USERREQ__MAKE_MEM_DEFINED to tell that this 
 memory is defined

>>>
>>> Thank you all very much for taking part in the discussion.
>>> Also, one may use the Valgrind technology to suppress the unwanted
>>> reports by adding the Valgrind specific format file valgrind.supp to the
>>> QEMU project. The file content is extendable for future needs.
>>> All the cases we like to suppress will be recounted in that file.
>>> A case looks like the stack fragments. For instance, from QEMU block:
>>>
>>> {
>>>  hw/block/hd-geometry.c
>>>  Memcheck:Cond
>>>  fun:guess_disk_lchs
>>>  fun:hd_geometry_guess
>>>  fun:blkconf_geometry
>>>  ...
>>>  fun:device_set_realized
>>>  fun:property_set_bool
>>>  fun:object_property_set
>>>  fun:object_property_set_qobject
>>>  fun:object_property_set_bool
>>> }
>>>
>>> The number of suppressed cases are reported by the Valgrind with every
>>> run: "ERROR SUMMARY: 5 errors from 3 contexts (suppressed: 0 from 0)"
>>>
>>> Andrey
>>
>> Yes, indeed that would be another variant. How performance critical are
>> the fixed locations? That might have an impact on what is the best solution.
>>  From a cleanliness approach doing 1 (adding the ioctl definition to 
>> valgrind)
>> is certainly the most beautiful way. I did that in the past, look for 
>> example at
>>
>> https://sourceware.org/git/?p=valgrind.git;a=commitdiff;h=c2baee9b7bf043702c130de0771a4df439fcf403
>> or
>> https://sourceware.org/git/?p=valgrind.git;a=commitdiff;h=00a31dd3d1e7101b331c2c83fca6c666ba35d910
>>
>> for examples.
>>
>>
>>>
>
> Paolo
>
>> ---
>>target/i386/kvm.c | 3 +++
>>1 file changed, 3 insertions(+)
>>
>> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
>> index dbbb137..ed57e31 100644
>> --- a/target/i386/kvm.c
>> +++ b/target/i386/kvm.c
>> @@ -190,6 +190,7 @@ static int kvm_get_tsc(CPUState *cs)
>>return 0;
>>}
>>
>> +memset(&msr_data, 0, sizeof(msr_data));
>>msr_data.info.nmsrs = 1;
>>msr_data.entries[0].index = MSR_IA32_TSC;
>>env->tsc_valid = !runstate_is_running();
>> @@ -1706,6 +1707,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
>>
>>if (has_xsave) {
>>env->xsave_buf = qemu_memalign(4096, sizeof(struct 
>> kvm_xsave));
>> +memset(env->xsave_buf, 0, sizeof(struct kvm_xsave));
> 
> This is memsetting 4k?
> Yet another variant would be to use the RUNNING_ON_VALGRIND macro from
> valgrind/valgrind.h to only memset for valgrind. But just using 
> MAKE_MEM_DEFINED
> from memcheck.h is simpler.
> 

So, on this assumption, the code would look like

#ifdef CONFIG_VALGRIND_H
#include 
#endif

#ifdef CONFIG_VALGRIND_H
 VALGRIND_MAKE_MEM_DEFINED(&msr_data, sizeof(msr_data));
#endif

etc.

Andrey
-- 
With the best regards,
Andrey Shinkevich

[Qemu-devel] [PATCH v3 00/18] ppc/pnv: add XIVE support for KVM guests

Hello,

The QEMU PowerNV machine emulates a baremetal OpenPOWER system and
acts as an hypervisor (L0). Supporting emulation of KVM to run guests
(L1) requires a few more extensions, among which guest support for the
XIVE interrupt controller on POWER9 processor.

The following changes add new per-CPU PowerNV machines and extend the
XIVE models with the new XiveFabric and XivePresenter interfaces to
provide support for XIVE escalations and interrupt resend. This
mechanism is used by XIVE to notify the hypervisor that a vCPU is not
dispatched on a HW thread. Tested on a QEMU PowerNV machine and a
simple QEMU pseries guest doing network on a local bridge.

The XIVE interrupt controller offers a way to increase the XIVE
resources per chip by configuring multiple XIVE blocks on a chip. This
is not currently supported by the model. However, some configurations,
such as OPAL/skiboot, use one block-per-chip configuration with some
optimizations. One of them is to override the hardwired chip ID by the
block id in the PowerBUS operations and for CAM line compares. This
patchset improves the support for this setup. Tested with 4 chips.

A series from Suraj adding guest support in the Radix MMU model of the
QEMU PowerNV machine is still required and will be send later. The
whole patchset can be found under :

  https://github.com/legoater/qemu/tree/powernv-4.1

Thanks,

C.

Changes since v2:

 - introduced the XiveFabric and XivePresenter interfaces
 - removed the need of a XiveRouter pointer under XiveTCTX

Changes since v1:

 - minor extra fixes 
 - split the escalation support in different patches
 - kept the XiveRouter type for XiveTCTX back pointer (will address
   this in P10)
 - removed pnv_xive_vst_size(). Really broken on indirect tables.
 - improved the dump of the NVT table
 - introduce pnv_xive_get_block_id()


Cédric Le Goater (18):
  ppc/pnv: Introduce PowerNV machines with fixed CPU models
  tests/boot-serial-test: add support for all the PowerNV machines
  ppc/xive: Introduce the XiveFabric and XivePresenter interfaces
  ppc/pnv: Implement the XiveFabric and XivePresenter interfaces
  ppc/spapr: Implement the XiveFabric and XivePresenter interfaces
  ppc/xive: Use the XiveFabric and XivePresenter interfaces
  ppc/xive: Extend the TIMA operation with a XivePresenter parameter
  ppc/pnv: Clarify how the TIMA is accessed on a multichip system
  ppc/xive: Move the TIMA operations to the controller model
  ppc/xive: Introduce a xive_tctx_ipb_update() helper
  ppc/xive: Synthesize interrupt from the saved IPB in the NVT
  ppc/pnv: Remove pnv_xive_vst_size() routine
  ppc/pnv: Dump the XIVE NVT table
  ppc/pnv: Skip empty slots of the XIVE NVT table
  ppc/pnv: Introduce a pnv_xive_block_id() helper
  ppc/pnv: Extend XivePresenter with a get_block_id() handler
  ppc/pnv: Quiesce some XIVE errors
  ppc/xive: Introduce a xive_os_cam_decode() helper

 include/hw/ppc/pnv.h   |  13 ++
 include/hw/ppc/pnv_xive.h  |   3 -
 include/hw/ppc/spapr_irq.h |   6 +
 include/hw/ppc/xive.h  |  71 ++--
 include/hw/ppc/xive_regs.h |  24 +++
 hw/intc/pnv_xive.c | 356 -
 hw/intc/spapr_xive.c   |  80 -
 hw/intc/xive.c | 319 ++---
 hw/ppc/pnv.c   | 102 ++-
 hw/ppc/spapr.c |  34 
 hw/ppc/spapr_irq.c |  25 +++
 tests/boot-serial-test.c   |   3 +-
 12 files changed, 743 insertions(+), 293 deletions(-)

-- 
2.21.0

[Qemu-devel] [PATCH v3 02/18] tests/boot-serial-test: add support for all the PowerNV machines

Use the machine names specifiying the CPU type, POWER8 and POWER9.

Signed-off-by: Cédric Le Goater 
---
 tests/boot-serial-test.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c
index 24852d4c7d0b..a54d007298f7 100644
--- a/tests/boot-serial-test.c
+++ b/tests/boot-serial-test.c
@@ -103,7 +103,8 @@ static testdef_t tests[] = {
 { "ppc64", "pseries",
   "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken",
   "Open Firmware" },
-{ "ppc64", "powernv", "-cpu POWER8", "OPAL" },
+{ "ppc64", "powernv8", "", "OPAL" },
+{ "ppc64", "powernv9", "", "OPAL" },
 { "ppc64", "sam460ex", "-device e1000", "8086  100e" },
 { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" },
 { "i386", "pc", "-device sga", "SGABIOS" },
-- 
2.21.0

[Qemu-devel] [PATCH v3 01/18] ppc/pnv: Introduce PowerNV machines with fixed CPU models

Make the current "powernv" machine an abstract type and derive from it
new machines with specific CPU models: power8 and power9.

The "powernv" machine is now an alias on the "powernv9" machine.

Signed-off-by: Cédric Le Goater 
---
 hw/ppc/pnv.c | 70 ++--
 1 file changed, 63 insertions(+), 7 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 4570ce8afe6a..18602b9e9bcd 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -602,9 +602,20 @@ static void pnv_chip_power9_pic_print_info(PnvChip *chip, 
Monitor *mon)
 pnv_psi_pic_print_info(&chip9->psi, mon);
 }
 
+static bool pnv_match_cpu(const char *default_type, const char *cpu_type)
+{
+PowerPCCPUClass *ppc_default =
+POWERPC_CPU_CLASS(object_class_by_name(default_type));
+PowerPCCPUClass *ppc =
+POWERPC_CPU_CLASS(object_class_by_name(cpu_type));
+
+return ppc_default->pvr_match(ppc_default, ppc->pvr);
+}
+
 static void pnv_init(MachineState *machine)
 {
 PnvMachineState *pnv = PNV_MACHINE(machine);
+MachineClass *mc = MACHINE_GET_CLASS(machine);
 MemoryRegion *ram;
 char *fw_filename;
 long fw_size;
@@ -664,13 +675,23 @@ static void pnv_init(MachineState *machine)
 }
 }
 
+/*
+ * Check compatibility of the specified CPU with the machine
+ * default.
+ */
+if (!pnv_match_cpu(mc->default_cpu_type, machine->cpu_type)) {
+error_report("invalid CPU model '%s' for %s machine",
+ machine->cpu_type, mc->name);
+exit(1);
+}
+
 /* Create the processor chips */
 i = strlen(machine->cpu_type) - strlen(POWERPC_CPU_TYPE_SUFFIX);
 chip_typename = g_strdup_printf(PNV_CHIP_TYPE_NAME("%.*s"),
 i, machine->cpu_type);
 if (!object_class_by_name(chip_typename)) {
-error_report("invalid CPU model '%.*s' for %s machine",
- i, machine->cpu_type, MACHINE_GET_CLASS(machine)->name);
+error_report("invalid chip model '%.*s' for %s machine",
+ i, machine->cpu_type, mc->name);
 exit(1);
 }
 
@@ -1348,25 +1369,43 @@ static void pnv_machine_class_props_init(ObjectClass 
*oc)
   NULL);
 }
 
-static void pnv_machine_class_init(ObjectClass *oc, void *data)
+static void pnv_machine_power8_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
 XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
+
+mc->desc = "IBM PowerNV (Non-Virtualized) POWER8";
+mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
+
+xic->icp_get = pnv_icp_get;
+xic->ics_get = pnv_ics_get;
+xic->ics_resend = pnv_ics_resend;
+}
+
+static void pnv_machine_power9_class_init(ObjectClass *oc, void *data)
+{
+MachineClass *mc = MACHINE_CLASS(oc);
+
+mc->desc = "IBM PowerNV (Non-Virtualized) POWER9";
+mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0");
+
+mc->alias = "powernv";
+}
+
+static void pnv_machine_class_init(ObjectClass *oc, void *data)
+{
+MachineClass *mc = MACHINE_CLASS(oc);
 InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
 
 mc->desc = "IBM PowerNV (Non-Virtualized)";
 mc->init = pnv_init;
 mc->reset = pnv_reset;
 mc->max_cpus = MAX_CPUS;
-mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
 mc->block_default_type = IF_IDE; /* Pnv provides a AHCI device for
   * storage */
 mc->no_parallel = 1;
 mc->default_boot_order = NULL;
 mc->default_ram_size = 2 * GiB;
-xic->icp_get = pnv_icp_get;
-xic->ics_get = pnv_ics_get;
-xic->ics_resend = pnv_ics_resend;
 ispc->print_info = pnv_pic_print_info;
 
 pnv_machine_class_props_init(oc);
@@ -1386,10 +1425,27 @@ static void pnv_machine_class_init(ObjectClass *oc, 
void *data)
 .parent= TYPE_PNV9_CHIP,  \
 }
 
+#define DEFINE_PNV_MACHINE_TYPE(cpu, class_initfn)  \
+{   \
+.name  = MACHINE_TYPE_NAME(cpu),\
+.parent= TYPE_PNV_MACHINE,  \
+.instance_size = sizeof(PnvMachineState),   \
+.instance_init = pnv_machine_instance_init, \
+.class_init= class_initfn,  \
+.interfaces = (InterfaceInfo[]) {   \
+{ TYPE_XICS_FABRIC },   \
+{ TYPE_INTERRUPT_STATS_PROVIDER },  \
+{ },\
+},  \
+}
+
 static const TypeInfo types[] = {
+DEFINE_PNV_MACHINE_TYPE("powernv8", pnv_machine_power8_class_init),
+DEFINE_PNV_MACHINE_TYPE("powernv9", pnv_machine_power9_class_init),
 {
 .name  = TYPE_PNV_MACHINE,
 .parent= TYPE_MACHINE,
+.abstract   = true,
 .i

[Qemu-devel] [PATCH v3 08/18] ppc/pnv: Clarify how the TIMA is accessed on a multichip system

The TIMA MMIO space is shared among the chips and to identify the chip
from which the access is being done, the PowerBUS uses a chip field in
the load/store messages. QEMU does not model these messages, so
instead, we extract the chip id from the CPU PIR and do a lookup at
the machine level to fetch the targeted interrupt controller.

Introduce pnv_get_chip() and pnv_xive_tm_get_xive() helpers to clarify
this process in pnv_xive_get_tctx(). The latter will be removed in the
subsequent patches but the same principle will be kept.

Signed-off-by: Cédric Le Goater 
---
 include/hw/ppc/pnv.h | 13 +
 hw/intc/pnv_xive.c   | 40 +++-
 2 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index fb123edc4e5a..72b5cbb606b5 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -182,6 +182,19 @@ static inline bool pnv_is_power9(PnvMachineState *pnv)
 return pnv_chip_is_power9(pnv->chips[0]);
 }
 
+static inline PnvChip *pnv_get_chip(PnvMachineState *pnv, uint32_t chip_id)
+{
+int i;
+
+for (i = 0; i < pnv->num_chips; i++) {
+PnvChip *chip = pnv->chips[i];
+if (chip->chip_id == chip_id) {
+return chip;
+}
+}
+return NULL;
+}
+
 #define PNV_FDT_ADDR  0x0100
 #define PNV_TIMEBASE_FREQ 51200ULL
 
diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c
index 6f53a2abbb6a..e598857359d8 100644
--- a/hw/intc/pnv_xive.c
+++ b/hw/intc/pnv_xive.c
@@ -464,31 +464,37 @@ static int pnv_xive_match_nvt(XivePresenter *xptr, 
uint8_t format,
 return count;
 }
 
+/*
+ * The TIMA MMIO space is shared among the chips and to identify the
+ * chip from which the access is being done, we extract the chip id
+ * from the PIR.
+ */
+static PnvXive *pnv_xive_tm_get_xive(PowerPCCPU *cpu)
+{
+PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+PnvChip *chip;
+PnvXive *xive;
+
+chip = pnv_get_chip(pnv, cpu_chip_id(cpu));
+assert(chip);
+xive = &PNV9_CHIP(chip)->xive;
+
+if (!pnv_xive_is_cpu_enabled(xive, cpu)) {
+xive_error(xive, "IC: CPU %x is not enabled", cpu_pir(cpu));
+}
+return xive;
+}
+
 static XiveTCTX *pnv_xive_get_tctx(XiveRouter *xrtr, CPUState *cs)
 {
 PowerPCCPU *cpu = POWERPC_CPU(cs);
-XiveTCTX *tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc);
-PnvXive *xive = NULL;
-CPUPPCState *env = &cpu->env;
-int pir = env->spr_cb[SPR_PIR].default_value;
+PnvXive *xive = pnv_xive_tm_get_xive(cpu);
 
-/*
- * Perform an extra check on the HW thread enablement.
- *
- * The TIMA is shared among the chips and to identify the chip
- * from which the access is being done, we extract the chip id
- * from the PIR.
- */
-xive = pnv_xive_get_ic((pir >> 8) & 0xf);
 if (!xive) {
 return NULL;
 }
 
-if (!(xive->regs[PC_THREAD_EN_REG0 >> 3] & PPC_BIT(pir & 0x3f))) {
-xive_error(PNV_XIVE(xrtr), "IC: CPU %x is not enabled", pir);
-}
-
-return tctx;
+return XIVE_TCTX(pnv_cpu_state(cpu)->intc);
 }
 
 /*
-- 
2.21.0

[Qemu-devel] [PATCH v3 05/18] ppc/spapr: Implement the XiveFabric and XivePresenter interfaces

The CAM line matching sequence in the pseries machine does not change
much apart from the use of the new QOM interfaces.

Fixes: af53dbf6227a ("ppc/xive: introduce a simplified XIVE presenter")
Signed-off-by: Cédric Le Goater 
---
 include/hw/ppc/spapr_irq.h |  6 ++
 hw/intc/spapr_xive.c   | 41 ++
 hw/ppc/spapr.c | 34 +++
 hw/ppc/spapr_irq.c | 25 +++
 4 files changed, 106 insertions(+)

diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
index f965a58f8954..8c99b0680f97 100644
--- a/include/hw/ppc/spapr_irq.h
+++ b/include/hw/ppc/spapr_irq.h
@@ -30,6 +30,8 @@ int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t 
num, bool align,
 void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num);
 void spapr_irq_msi_reset(SpaprMachineState *spapr);
 
+struct XiveTCTXMatch;
+
 typedef struct SpaprIrq {
 uint32_tnr_irqs;
 uint32_tnr_msis;
@@ -49,6 +51,10 @@ typedef struct SpaprIrq {
 void (*set_irq)(void *opaque, int srcno, int val);
 const char *(*get_nodename)(SpaprMachineState *spapr);
 void (*init_kvm)(SpaprMachineState *spapr, Error **errp);
+int (*match_nvt)(SpaprMachineState *spapr, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv, struct XiveTCTXMatch *match);
 } SpaprIrq;
 
 extern SpaprIrq spapr_irq_xics;
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index ba012c7b0fdc..beb5049ad9da 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -419,6 +419,44 @@ static XiveTCTX *spapr_xive_get_tctx(XiveRouter *xrtr, 
CPUState *cs)
 return spapr_cpu_state(cpu)->tctx;
 }
 
+static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format,
+uint8_t nvt_blk, uint32_t nvt_idx,
+bool cam_ignore, uint8_t priority,
+uint32_t logic_serv, XiveTCTXMatch *match)
+{
+CPUState *cs;
+int count = 0;
+
+CPU_FOREACH(cs) {
+PowerPCCPU *cpu = POWERPC_CPU(cs);
+XiveTCTX *tctx = spapr_cpu_state(cpu)->tctx;
+int ring;
+
+/*
+ * Check the thread context CAM lines and record matches.
+ */
+ring = xive_presenter_tctx_match(xptr, tctx, format, nvt_blk, nvt_idx,
+ cam_ignore, logic_serv);
+/*
+ * Save the matching thread interrupt context and follow on to
+ * check for duplicates which are invalid.
+ */
+if (ring != -1) {
+if (match->tctx) {
+qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
+  "context NVT %x/%x\n", nvt_blk, nvt_idx);
+return -1;
+}
+
+match->ring = ring;
+match->tctx = tctx;
+count++;
+}
+}
+
+return count;
+}
+
 static const VMStateDescription vmstate_spapr_xive_end = {
 .name = TYPE_SPAPR_XIVE "/end",
 .version_id = 1,
@@ -496,6 +534,7 @@ static void spapr_xive_class_init(ObjectClass *klass, void 
*data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
 XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
+XivePresenterClass *xpc = XIVE_PRESENTER_CLASS(klass);
 
 dc->desc= "sPAPR XIVE Interrupt Controller";
 dc->props   = spapr_xive_properties;
@@ -508,6 +547,8 @@ static void spapr_xive_class_init(ObjectClass *klass, void 
*data)
 xrc->get_nvt = spapr_xive_get_nvt;
 xrc->write_nvt = spapr_xive_write_nvt;
 xrc->get_tctx = spapr_xive_get_tctx;
+
+xpc->match_nvt  = spapr_xive_match_nvt;
 }
 
 static const TypeInfo spapr_xive_info = {
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 115bbfb0e788..631db719f41f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4302,6 +4302,37 @@ static void spapr_pic_print_info(InterruptStatsProvider 
*obj,
 spapr->irq->print_info(spapr, mon);
 }
 
+static int spapr_xive_match_nvt(XiveFabric *xfb, uint8_t format,
+uint8_t nvt_blk, uint32_t nvt_idx,
+bool cam_ignore, uint8_t priority,
+uint32_t logic_serv, XiveTCTXMatch *match)
+{
+SpaprMachineState *spapr = SPAPR_MACHINE(xfb);
+int count;
+
+count = spapr->irq->match_nvt(spapr, format, nvt_blk, nvt_idx, cam_ignore,
+  priority, logic_serv, match);
+if (count < 0) {
+return count;
+}
+
+/*
+ * When we implement the save and restore of the thread interrupt
+ * contexts in the enter/exit CPU handlers of the machine and the
+ * escalations in QEMU, we should be able to handle non dispatched
+ * vCPUs.
+ *
+ * Until this is done, the sPAPR machine should find at least one
+ * matching context always.
+ *

[Qemu-devel] [PATCH v3 07/18] ppc/xive: Extend the TIMA operation with a XivePresenter parameter

The TIMA operations are performed on behalf of the XIVE IVPE
sub-engine (Presenter) on the thread interrupt context registers. The
current operations the model supports are simple and do not require
access to the controller but more complex operations we will add will
need access to the controller NVT table and to its configuration.

Signed-off-by: Cédric Le Goater 
---
 include/hw/ppc/xive.h |  7 +++---
 hw/intc/pnv_xive.c|  4 +--
 hw/intc/xive.c| 58 ---
 3 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 165134ce52a5..ba43a4a129d9 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -464,9 +464,10 @@ typedef struct XiveENDSource {
 #define XIVE_TM_USER_PAGE   0x3
 
 extern const MemoryRegionOps xive_tm_ops;
-void xive_tctx_tm_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
-unsigned size);
-uint64_t xive_tctx_tm_read(XiveTCTX *tctx, hwaddr offset, unsigned size);
+void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
+uint64_t value, unsigned size);
+uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
+   unsigned size);
 
 void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon);
 Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp);
diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c
index 183798b81496..6f53a2abbb6a 100644
--- a/hw/intc/pnv_xive.c
+++ b/hw/intc/pnv_xive.c
@@ -1444,7 +1444,7 @@ static void xive_tm_indirect_write(void *opaque, hwaddr 
offset,
 {
 XiveTCTX *tctx = pnv_xive_get_indirect_tctx(PNV_XIVE(opaque));
 
-xive_tctx_tm_write(tctx, offset, value, size);
+xive_tctx_tm_write(XIVE_PRESENTER(opaque), tctx, offset, value, size);
 }
 
 static uint64_t xive_tm_indirect_read(void *opaque, hwaddr offset,
@@ -1452,7 +1452,7 @@ static uint64_t xive_tm_indirect_read(void *opaque, 
hwaddr offset,
 {
 XiveTCTX *tctx = pnv_xive_get_indirect_tctx(PNV_XIVE(opaque));
 
-return xive_tctx_tm_read(tctx, offset, size);
+return xive_tctx_tm_read(XIVE_PRESENTER(opaque), tctx, offset, size);
 }
 
 static const MemoryRegionOps xive_tm_indirect_ops = {
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index bec0c878705c..9ca015969143 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -141,19 +141,20 @@ static inline uint32_t xive_tctx_word2(uint8_t *ring)
  * XIVE Thread Interrupt Management Area (TIMA)
  */
 
-static void xive_tm_set_hv_cppr(XiveTCTX *tctx, hwaddr offset,
-uint64_t value, unsigned size)
+static void xive_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx,
+hwaddr offset, uint64_t value, unsigned size)
 {
 xive_tctx_set_cppr(tctx, TM_QW3_HV_PHYS, value & 0xff);
 }
 
-static uint64_t xive_tm_ack_hv_reg(XiveTCTX *tctx, hwaddr offset, unsigned 
size)
+static uint64_t xive_tm_ack_hv_reg(XivePresenter *xptr, XiveTCTX *tctx,
+   hwaddr offset, unsigned size)
 {
 return xive_tctx_accept(tctx, TM_QW3_HV_PHYS);
 }
 
-static uint64_t xive_tm_pull_pool_ctx(XiveTCTX *tctx, hwaddr offset,
-  unsigned size)
+static uint64_t xive_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx,
+  hwaddr offset, unsigned size)
 {
 uint32_t qw2w2_prev = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
 uint32_t qw2w2;
@@ -163,13 +164,14 @@ static uint64_t xive_tm_pull_pool_ctx(XiveTCTX *tctx, 
hwaddr offset,
 return qw2w2;
 }
 
-static void xive_tm_vt_push(XiveTCTX *tctx, hwaddr offset,
+static void xive_tm_vt_push(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
 uint64_t value, unsigned size)
 {
 tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = value & 0xff;
 }
 
-static uint64_t xive_tm_vt_poll(XiveTCTX *tctx, hwaddr offset, unsigned size)
+static uint64_t xive_tm_vt_poll(XivePresenter *xptr, XiveTCTX *tctx,
+hwaddr offset, unsigned size)
 {
 return tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] & 0xff;
 }
@@ -312,13 +314,14 @@ static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr 
offset, unsigned size)
  * state changes (side effects) in addition to setting/returning the
  * interrupt management area context of the processor thread.
  */
-static uint64_t xive_tm_ack_os_reg(XiveTCTX *tctx, hwaddr offset, unsigned 
size)
+static uint64_t xive_tm_ack_os_reg(XivePresenter *xptr, XiveTCTX *tctx,
+   hwaddr offset, unsigned size)
 {
 return xive_tctx_accept(tctx, TM_QW1_OS);
 }
 
-static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset,
-uint64_t value, unsigned size)
+static void xive_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
+hwaddr offset, uint64_t value, unsigned size)
 {

[Qemu-devel] [PATCH v3 03/18] ppc/xive: Introduce the XiveFabric and XivePresenter interfaces

When the XIVE IVRE sub-engine (XiveRouter) looks for a Notification
Virtual Target (NVT) to notify, it broadcasts a message on the
PowerBUS to find an XIVE IVPE sub-engine (Presenter) which has the NVT
dispatched on one of its HW threads, and then forwards the
notification if any response was received.

The current XIVE presenter model is sufficient for the pseries machine
because it has a single interrupt controller device, but the PowerNV
machine can have multiple chips each having its own interrupt
controller. In this case, the XIVE presenter model is too simple and
the CAM line matching should scan all chips of the system.

We introduce a XiveFabric QOM interface which needs to be implemented
by the machine. It acts as the PowerBUS interface between the
interrupt controller and the system. On HW, the XIVE sub-engine
responsible for the communication with the other chip is the Common
Queue (CQ) bridge unit. This interface offers a 'match_nvt' handler to
perform the CAM line matching when looking for a XIVE Presenter with a
dispatched NVT.

We also introduce a XivePresenter QOM interface to represent the XIVE
Presenter (PC) sub-engine of the XIVE controller with a similar
'match_nvt' handler to perform the CAM line matching. This interface
could be merged in the XiveRouter but as it is stateless and performs
the XIVE IVPE sub-engine tasks, it makes sense to keep it independent.
It should have its use in POWER10.

Fixes: af53dbf6227a ("ppc/xive: introduce a simplified XIVE presenter")
Signed-off-by: Cédric Le Goater 
---
 include/hw/ppc/xive.h | 54 +++
 hw/intc/xive.c| 36 +
 2 files changed, 81 insertions(+), 9 deletions(-)

diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 4851ff87e795..165134ce52a5 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -368,6 +368,60 @@ int xive_router_write_nvt(XiveRouter *xrtr, uint8_t 
nvt_blk, uint32_t nvt_idx,
 XiveTCTX *xive_router_get_tctx(XiveRouter *xrtr, CPUState *cs);
 void xive_router_notify(XiveNotifier *xn, uint32_t lisn);
 
+/*
+ * XIVE Presenter
+ */
+
+typedef struct XiveTCTXMatch {
+XiveTCTX *tctx;
+uint8_t ring;
+} XiveTCTXMatch;
+
+typedef struct XivePresenter XivePresenter;
+
+#define TYPE_XIVE_PRESENTER "xive-presenter"
+#define XIVE_PRESENTER(obj) \
+INTERFACE_CHECK(XivePresenter, (obj), TYPE_XIVE_PRESENTER)
+#define XIVE_PRESENTER_CLASS(klass) \
+OBJECT_CLASS_CHECK(XivePresenterClass, (klass), TYPE_XIVE_PRESENTER)
+#define XIVE_PRESENTER_GET_CLASS(obj)   \
+OBJECT_GET_CLASS(XivePresenterClass, (obj), TYPE_XIVE_PRESENTER)
+
+typedef struct XivePresenterClass {
+InterfaceClass parent;
+int (*match_nvt)(XivePresenter *xptr, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv, XiveTCTXMatch *match);
+} XivePresenterClass;
+
+int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
+  uint8_t format,
+  uint8_t nvt_blk, uint32_t nvt_idx,
+  bool cam_ignore, uint32_t logic_serv);
+
+/*
+ * XIVE Fabric (Interface between Interrupt Controller and Machine)
+ */
+
+typedef struct XiveFabric XiveFabric;
+
+#define TYPE_XIVE_FABRIC "xive-fabric"
+#define XIVE_FABRIC(obj) \
+INTERFACE_CHECK(XiveFabric, (obj), TYPE_XIVE_FABRIC)
+#define XIVE_FABRIC_CLASS(klass) \
+OBJECT_CLASS_CHECK(XiveFabricClass, (klass), TYPE_XIVE_FABRIC)
+#define XIVE_FABRIC_GET_CLASS(obj)   \
+OBJECT_GET_CLASS(XiveFabricClass, (obj), TYPE_XIVE_FABRIC)
+
+typedef struct XiveFabricClass {
+InterfaceClass parent;
+int (*match_nvt)(XiveFabric *xfb, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv, XiveTCTXMatch *match);
+} XiveFabricClass;
+
 /*
  * XIVE END ESBs
  */
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 88f2e560db0f..4bdedab13047 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -1294,9 +1294,10 @@ static uint32_t xive_tctx_hw_cam_line(XiveTCTX *tctx)
 /*
  * The thread context register words are in big-endian format.
  */
-static int xive_presenter_tctx_match(XiveTCTX *tctx, uint8_t format,
- uint8_t nvt_blk, uint32_t nvt_idx,
- bool cam_ignore, uint32_t logic_serv)
+int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
+  uint8_t format,
+  uint8_t nvt_blk, uint32_t nvt_idx,
+  bool cam_ignore, uint32_t logic_serv)
 {
 ui

[Qemu-devel] [PATCH v3 10/18] ppc/xive: Introduce a xive_tctx_ipb_update() helper

We will use it to resend missed interrupts when a vCPU context is
pushed a HW thread.

Signed-off-by: Cédric Le Goater 
---
 include/hw/ppc/xive.h |  1 +
 hw/intc/xive.c| 15 +++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index b34e2ad43a82..4233773bae11 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -468,6 +468,7 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX 
*tctx, hwaddr offset,
 
 void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon);
 Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp);
+void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb);
 
 static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx)
 {
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 6f74b4dcbbd4..8f6a29ff1f47 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -132,6 +132,15 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t 
ring, uint8_t cppr)
 xive_tctx_notify(tctx, ring);
 }
 
+void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb)
+{
+uint8_t *regs = &tctx->regs[ring];
+
+regs[TM_IPB] |= ipb;
+regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
+xive_tctx_notify(tctx, ring);
+}
+
 static inline uint32_t xive_tctx_word2(uint8_t *ring)
 {
 return *((uint32_t *) &ring[TM_WORD2]);
@@ -333,8 +342,7 @@ static void xive_tm_set_os_cppr(XivePresenter *xptr, 
XiveTCTX *tctx,
 static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
hwaddr offset, uint64_t value, unsigned 
size)
 {
-ipb_update(&tctx->regs[TM_QW1_OS], value & 0xff);
-xive_tctx_notify(tctx, TM_QW1_OS);
+xive_tctx_ipb_update(tctx, TM_QW1_OS, priority_to_ipb(value & 0xff));
 }
 
 static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
@@ -1360,8 +1368,7 @@ static bool xive_presenter_notify(uint8_t format,
 
 /* handle CPU exception delivery */
 if (count) {
-ipb_update(&match.tctx->regs[match.ring], priority);
-xive_tctx_notify(match.tctx, match.ring);
+xive_tctx_ipb_update(match.tctx, match.ring, 
priority_to_ipb(priority));
 }
 
 return count;
-- 
2.21.0

[Qemu-devel] [PATCH v3 06/18] ppc/xive: Use the XiveFabric and XivePresenter interfaces

Now that the machines have handlers implemented for the XiveFabric and
XivePresenter interfaces, remove xive_presenter_match() and make use of
the 'match_nvt' handler of the machine.

Fixes: af53dbf6227a ("ppc/xive: introduce a simplified XIVE presenter")
Signed-off-by: Cédric Le Goater 
---
 hw/intc/xive.c | 77 +++---
 1 file changed, 17 insertions(+), 60 deletions(-)

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 4bdedab13047..bec0c878705c 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -1354,59 +1354,6 @@ int xive_presenter_tctx_match(XivePresenter *xptr, 
XiveTCTX *tctx,
 return -1;
 }
 
-static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
- uint8_t nvt_blk, uint32_t nvt_idx,
- bool cam_ignore, uint8_t priority,
- uint32_t logic_serv, XiveTCTXMatch *match)
-{
-CPUState *cs;
-
-/*
- * TODO (PowerNV): handle chip_id overwrite of block field for
- * hardwired CAM compares
- */
-
-CPU_FOREACH(cs) {
-XiveTCTX *tctx = xive_router_get_tctx(xrtr, cs);
-int ring;
-
-/*
- * HW checks that the CPU is enabled in the Physical Thread
- * Enable Register (PTER).
- */
-
-/*
- * Check the thread context CAM lines and record matches. We
- * will handle CPU exception delivery later
- */
-ring = xive_presenter_tctx_match(XIVE_PRESENTER(xrtr), tctx, format,
- nvt_blk, nvt_idx,
- cam_ignore, logic_serv);
-/*
- * Save the context and follow on to catch duplicates, that we
- * don't support yet.
- */
-if (ring != -1) {
-if (match->tctx) {
-qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
-  "context NVT %x/%x\n", nvt_blk, nvt_idx);
-return false;
-}
-
-match->ring = ring;
-match->tctx = tctx;
-}
-}
-
-if (!match->tctx) {
-qemu_log_mask(LOG_UNIMP, "XIVE: NVT %x/%x is not dispatched\n",
-  nvt_blk, nvt_idx);
-return false;
-}
-
-return true;
-}
-
 /*
  * This is our simple Xive Presenter Engine model. It is merged in the
  * Router as it does not require an extra object.
@@ -1422,22 +1369,32 @@ static bool xive_presenter_match(XiveRouter *xrtr, 
uint8_t format,
  *
  * The parameters represent what is sent on the PowerBus
  */
-static bool xive_presenter_notify(XiveRouter *xrtr, uint8_t format,
+static bool xive_presenter_notify(uint8_t format,
   uint8_t nvt_blk, uint32_t nvt_idx,
   bool cam_ignore, uint8_t priority,
   uint32_t logic_serv)
 {
+XiveFabric *xfb = XIVE_FABRIC(qdev_get_machine());
+XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb);
 XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
-bool found;
+int count;
 
-found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore,
- priority, logic_serv, &match);
-if (found) {
+/*
+ * Ask the machine to scan the interrupt controllers for a match
+ */
+count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, cam_ignore,
+   priority, logic_serv, &match);
+if (count < 0) {
+return false;
+}
+
+/* handle CPU exception delivery */
+if (count) {
 ipb_update(&match.tctx->regs[match.ring], priority);
 xive_tctx_notify(match.tctx, match.ring);
 }
 
-return found;
+return count;
 }
 
 /*
@@ -1550,7 +1507,7 @@ static void xive_router_end_notify(XiveRouter *xrtr, 
uint8_t end_blk,
 return;
 }
 
-found = xive_presenter_notify(xrtr, format, nvt_blk, nvt_idx,
+found = xive_presenter_notify(format, nvt_blk, nvt_idx,
   xive_get_field32(END_W7_F0_IGNORE, end.w7),
   priority,
   xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7));
-- 
2.21.0

[Qemu-devel] [PATCH v3 04/18] ppc/pnv: Implement the XiveFabric and XivePresenter interfaces

The CAM line matching on the PowerNV machine now scans all chips of
the system and all CPUs of a chip to find a dispatched NVT in the
thread contexts.

As there is now easy way to loop on the CPUs belonging to a chip, the
PowerNV handler loops on all CPUs and filter out the external CPUs.

Fixes: af53dbf6227a ("ppc/xive: introduce a simplified XIVE presenter")
Signed-off-by: Cédric Le Goater 
---
 hw/intc/pnv_xive.c | 76 ++
 hw/ppc/pnv.c   | 32 +++
 2 files changed, 108 insertions(+)

diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c
index ff1226485983..183798b81496 100644
--- a/hw/intc/pnv_xive.c
+++ b/hw/intc/pnv_xive.c
@@ -390,6 +390,80 @@ static int pnv_xive_get_eas(XiveRouter *xrtr, uint8_t blk, 
uint32_t idx,
 return pnv_xive_vst_read(xive, VST_TSEL_IVT, blk, idx, eas);
 }
 
+static int cpu_pir(PowerPCCPU *cpu)
+{
+CPUPPCState *env = &cpu->env;
+return env->spr_cb[SPR_PIR].default_value;
+}
+
+static int cpu_chip_id(PowerPCCPU *cpu)
+{
+int pir = cpu_pir(cpu);
+return (pir >> 8) & 0x7f;
+}
+
+static bool pnv_xive_is_cpu_enabled(PnvXive *xive, PowerPCCPU *cpu)
+{
+int pir = cpu_pir(cpu);
+int thrd_id = pir & 0x7f;
+
+return xive->regs[PC_THREAD_EN_REG0 >> 3] & PPC_BIT(thrd_id);
+}
+
+static bool pnv_xive_is_ignored(PnvChip *chip, CPUState *cs)
+{
+return chip->chip_id != cpu_chip_id(POWERPC_CPU(cs));
+}
+
+#define PNV_CHIP_CPU_FOREACH(chip, cs)  \
+CPU_FOREACH(cs) \
+if (pnv_xive_is_ignored(chip, cs)) {} else
+
+static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format,
+  uint8_t nvt_blk, uint32_t nvt_idx,
+  bool cam_ignore, uint8_t priority,
+  uint32_t logic_serv, XiveTCTXMatch *match)
+{
+PnvXive *xive = PNV_XIVE(xptr);
+CPUState *cs;
+int count = 0;
+
+/*
+ * Loop on all CPUs of the machine and filter out the CPUs
+ * belonging to another chip.
+ */
+PNV_CHIP_CPU_FOREACH(xive->chip, cs) {
+PowerPCCPU *cpu = POWERPC_CPU(cs);
+XiveTCTX *tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc);
+int ring;
+
+if (!pnv_xive_is_cpu_enabled(xive, cpu)) {
+continue;
+}
+
+ring = xive_presenter_tctx_match(xptr, tctx, format, nvt_blk, nvt_idx,
+ cam_ignore, logic_serv);
+/*
+ * Save the context and follow on to catch duplicates, that we
+ * don't support yet.
+ */
+if (ring != -1) {
+if (match->tctx) {
+qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a "
+  "thread context NVT %x/%x\n",
+  nvt_blk, nvt_idx);
+return -1;
+}
+
+match->ring = ring;
+match->tctx = tctx;
+count++;
+}
+}
+
+return count;
+}
+
 static XiveTCTX *pnv_xive_get_tctx(XiveRouter *xrtr, CPUState *cs)
 {
 PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -1795,6 +1869,7 @@ static void pnv_xive_class_init(ObjectClass *klass, void 
*data)
 PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
 XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
 XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
+XivePresenterClass *xpc = XIVE_PRESENTER_CLASS(klass);
 
 xdc->dt_xscom = pnv_xive_dt_xscom;
 
@@ -1810,6 +1885,7 @@ static void pnv_xive_class_init(ObjectClass *klass, void 
*data)
 xrc->get_tctx = pnv_xive_get_tctx;
 
 xnc->notify = pnv_xive_notify;
+xpc->match_nvt  = pnv_xive_match_nvt;
 };
 
 static const TypeInfo pnv_xive_info = {
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 18602b9e9bcd..3f6796831b68 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1322,6 +1322,35 @@ static void pnv_pic_print_info(InterruptStatsProvider 
*obj,
 }
 }
 
+static int pnv_xive_match_nvt(XiveFabric *xfb, uint8_t format,
+   uint8_t nvt_blk, uint32_t nvt_idx,
+   bool cam_ignore, uint8_t priority,
+   uint32_t logic_serv,
+   XiveTCTXMatch *match)
+{
+PnvMachineState *pnv = PNV_MACHINE(xfb);
+int total_count = 0;
+int i;
+
+for (i = 0; i < pnv->num_chips; i++) {
+Pnv9Chip *chip9 = PNV9_CHIP(pnv->chips[i]);
+XivePresenter *xptr = XIVE_PRESENTER(&chip9->xive);
+XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
+int count;
+
+count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, cam_ignore,
+   priority, logic_serv, match);
+
+if (count < 0) {
+return count;
+}
+
+total_count += count;
+}
+
+return total_count;
+}
+
 static void pnv_get_num_chips(Object *obj, Visitor *v, const ch

[Qemu-devel] [PATCH v3 13/18] ppc/pnv: Dump the XIVE NVT table

This is to track the configuration of the base END index of the vCPU
and the Interrupt Pending Buffer. The NVT IPB is updated when an
interrupt can not be presented to a vCPU.

Signed-off-by: Cédric Le Goater 
---
 include/hw/ppc/xive_regs.h |  2 ++
 hw/intc/pnv_xive.c | 22 ++
 2 files changed, 24 insertions(+)

diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
index 92ff80d25456..b228ab0ba3ea 100644
--- a/include/hw/ppc/xive_regs.h
+++ b/include/hw/ppc/xive_regs.h
@@ -229,6 +229,8 @@ typedef struct XiveNVT {
 uint32_tw0;
 #define NVT_W0_VALID PPC_BIT32(0)
 uint32_tw1;
+#define NVT_W1_EQ_BLOCK  PPC_BITMASK32(0, 3)
+#define NVT_W1_EQ_INDEX  PPC_BITMASK32(4, 31)
 uint32_tw2;
 uint32_tw3;
 uint32_tw4;
diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c
index aba7a3f83dab..849291ee683e 100644
--- a/hw/intc/pnv_xive.c
+++ b/hw/intc/pnv_xive.c
@@ -1641,6 +1641,21 @@ static const MemoryRegionOps pnv_xive_pc_ops = {
 },
 };
 
+static void xive_nvt_pic_print_info(XiveNVT *nvt, uint32_t nvt_idx,
+Monitor *mon)
+{
+uint8_t  eq_blk = xive_get_field32(NVT_W1_EQ_BLOCK, nvt->w1);
+uint32_t eq_idx = xive_get_field32(NVT_W1_EQ_INDEX, nvt->w1);
+
+if (!xive_nvt_is_valid(nvt)) {
+return;
+}
+
+monitor_printf(mon, "  %08x end:%02x/%04x IPB:%02x\n", nvt_idx,
+   eq_blk, eq_idx,
+   xive_get_field32(NVT_W4_IPB, nvt->w4));
+}
+
 void pnv_xive_pic_print_info(PnvXive *xive, Monitor *mon)
 {
 XiveRouter *xrtr = XIVE_ROUTER(xive);
@@ -1649,6 +1664,7 @@ void pnv_xive_pic_print_info(PnvXive *xive, Monitor *mon)
 uint32_t nr_ipis = pnv_xive_nr_ipis(xive, blk);
 XiveEAS eas;
 XiveEND end;
+XiveNVT nvt;
 int i;
 
 monitor_printf(mon, "XIVE[%x] Source %08x .. %08x\n", blk, srcno0,
@@ -1677,6 +1693,12 @@ void pnv_xive_pic_print_info(PnvXive *xive, Monitor *mon)
 while (!xive_router_get_end(xrtr, blk, i, &end)) {
 xive_end_eas_pic_print_info(&end, i++, mon);
 }
+
+monitor_printf(mon, "XIVE[%x] NVTT\n", blk);
+i = 0;
+while (!xive_router_get_nvt(xrtr, blk, i, &nvt)) {
+xive_nvt_pic_print_info(&nvt, i++, mon);
+}
 }
 
 static void pnv_xive_reset(void *dev)
-- 
2.21.0

[Qemu-devel] [PATCH v3 09/18] ppc/xive: Move the TIMA operations to the controller model