Re: [EXTERNAL] Re: [PATCH] WHPX: Assigning maintainer for Windows Hypervisor Platform

2020-02-20 Thread Stefan Weil
Am 19.02.20 um 16:50 schrieb Justin Terry (SF):

> Ha yes. LGTM Thanks!
>
> Reviewed-by: Justin Terry (VM) 
>
>> -Original Message-
>> From: Philippe Mathieu-Daudé 
>> Sent: Wednesday, February 19, 2020 12:32 AM
>> To: Justin Terry (SF) ; Sunil Muthuswamy
>> ; Eduardo Habkost ;
>> Paolo Bonzini ; Richard Henderson
>> 
>> Cc: Stefan Weil ; qemu-devel@nongnu.org
>> Subject: [EXTERNAL] Re: [PATCH] WHPX: Assigning maintainer for Windows
>> Hypervisor Platform
>>
>> Thank you Sunil!
>>
>> On 2/18/20 9:51 PM, Justin Terry (SF) wrote:
>>> Looks good to me! Thanks Sunil.
>>>
>>> Signed-off-by: Justin Terry (VM) 


Hello Justin, hello Sunil,

just a reminder: we still have the problem with the proprietary license
for the required Microsoft header files.

Can you estimate when this will be solved?

Regards,
Stefan





[PATCH v2 1/3] arm_gic: Mask the un-supported priority bits

2020-02-20 Thread Sai Pavan Boddu
Priority bits implemented in arm-gic can be 8 to 4, un-implemented bits
are read as zeros(RAZ).

Signed-off-by: Sai Pavan Boddu 
---
 hw/intc/arm_gic.c| 26 --
 hw/intc/arm_gic_common.c |  1 +
 include/hw/intc/arm_gic_common.h |  1 +
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 1d7da7b..dec8767 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -641,6 +641,23 @@ uint32_t gic_acknowledge_irq(GICState *s, int cpu, 
MemTxAttrs attrs)
 return ret;
 }
 
+static uint32_t gic_fullprio_mask(GICState *s, int cpu)
+{
+/*
+ * Return a mask word which clears the unimplemented priority
+ * bits from a priority value for an interrupt. (Not to be
+ * confused with the group priority, whose mask depends on BPR.)
+ */
+int unimpBits;
+
+if (gic_is_vcpu(cpu)) {
+unimpBits = GIC_VIRT_MAX_GROUP_PRIO_BITS;
+} else {
+unimpBits = 8 - s->n_prio_bits;
+}
+return ~0U << unimpBits;
+}
+
 void gic_dist_set_priority(GICState *s, int cpu, int irq, uint8_t val,
   MemTxAttrs attrs)
 {
@@ -669,7 +686,7 @@ static uint32_t gic_dist_get_priority(GICState *s, int cpu, 
int irq,
 }
 prio = (prio << 1) & 0xff; /* Non-secure view */
 }
-return prio;
+return prio & gic_fullprio_mask(s, cpu);
 }
 
 static void gic_set_priority_mask(GICState *s, int cpu, uint8_t pmask,
@@ -684,7 +701,7 @@ static void gic_set_priority_mask(GICState *s, int cpu, 
uint8_t pmask,
 return;
 }
 }
-s->priority_mask[cpu] = pmask;
+s->priority_mask[cpu] = pmask & gic_fullprio_mask(s, cpu);
 }
 
 static uint32_t gic_get_priority_mask(GICState *s, int cpu, MemTxAttrs attrs)
@@ -2055,6 +2072,11 @@ static void arm_gic_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
+if (s->n_prio_bits > 8) {
+error_setg(errp, "num-priority-bits cannot be greater than 8");
+return;
+}
+
 /* This creates distributor, main CPU interface (s->cpuiomem[0]) and if
  * enabled, virtualization extensions related interfaces (main virtual
  * interface (s->vifaceiomem[0]) and virtual CPU interface).
diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index e6c4fe7..7b44d56 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -357,6 +357,7 @@ static Property arm_gic_common_properties[] = {
 DEFINE_PROP_BOOL("has-security-extensions", GICState, security_extn, 0),
 /* True if the GIC should implement the virtualization extensions */
 DEFINE_PROP_BOOL("has-virtualization-extensions", GICState, virt_extn, 0),
+DEFINE_PROP_UINT32("num-priority-bits", GICState, n_prio_bits, 8),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h
index b5585fe..6e0d6b8 100644
--- a/include/hw/intc/arm_gic_common.h
+++ b/include/hw/intc/arm_gic_common.h
@@ -96,6 +96,7 @@ typedef struct GICState {
 uint16_t priority_mask[GIC_NCPU_VCPU];
 uint16_t running_priority[GIC_NCPU_VCPU];
 uint16_t current_pending[GIC_NCPU_VCPU];
+uint32_t n_prio_bits;
 
 /* If we present the GICv2 without security extensions to a guest,
  * the guest can configure the GICC_CTLR to configure group 1 binary point
-- 
2.7.4




[PATCH v2 3/3] cpu/arm11mpcore: Set number of GIC priority bits to 4

2020-02-20 Thread Sai Pavan Boddu
ARM11MPCore GIC is implemented with 4 priority bits.

Signed-off-by: Sai Pavan Boddu 
Suggested-by: Peter Maydell 
---
 hw/cpu/arm11mpcore.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/cpu/arm11mpcore.c b/hw/cpu/arm11mpcore.c
index 2e3e87c..ab9fadb 100644
--- a/hw/cpu/arm11mpcore.c
+++ b/hw/cpu/arm11mpcore.c
@@ -15,6 +15,7 @@
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 
+#define ARM11MPCORE_NUM_GIC_PRIORITY_BITS4
 
 static void mpcore_priv_set_irq(void *opaque, int irq, int level)
 {
@@ -86,6 +87,10 @@ static void mpcore_priv_realize(DeviceState *dev, Error 
**errp)
 
 qdev_prop_set_uint32(gicdev, "num-cpu", s->num_cpu);
 qdev_prop_set_uint32(gicdev, "num-irq", s->num_irq);
+qdev_prop_set_uint32(gicdev, "num-priority-bits",
+ ARM11MPCORE_NUM_GIC_PRIORITY_BITS);
+
+
 object_property_set_bool(OBJECT(>gic), true, "realized", );
 if (err != NULL) {
 error_propagate(errp, err);
-- 
2.7.4




[PATCH v2 2/3] cpu/a9mpcore: Set number of GIC priority bits to 5

2020-02-20 Thread Sai Pavan Boddu
All A9 CPUs have a GIC with 5 bits of priority.

Signed-off-by: Sai Pavan Boddu 
Suggested-by: Peter Maydell 
---
 hw/cpu/a9mpcore.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/cpu/a9mpcore.c b/hw/cpu/a9mpcore.c
index 1f8bc8a..b4f6a7e 100644
--- a/hw/cpu/a9mpcore.c
+++ b/hw/cpu/a9mpcore.c
@@ -16,6 +16,8 @@
 #include "hw/qdev-properties.h"
 #include "hw/core/cpu.h"
 
+#define A9_GIC_NUM_PRIORITY_BITS5
+
 static void a9mp_priv_set_irq(void *opaque, int irq, int level)
 {
 A9MPPrivState *s = (A9MPPrivState *)opaque;
@@ -68,6 +70,8 @@ static void a9mp_priv_realize(DeviceState *dev, Error **errp)
 gicdev = DEVICE(>gic);
 qdev_prop_set_uint32(gicdev, "num-cpu", s->num_cpu);
 qdev_prop_set_uint32(gicdev, "num-irq", s->num_irq);
+qdev_prop_set_uint32(gicdev, "num-priority-bits",
+ A9_GIC_NUM_PRIORITY_BITS);
 
 /* Make the GIC's TZ support match the CPUs. We assume that
  * either all the CPUs have TZ, or none do.
-- 
2.7.4




Re: [PATCH v7 01/11] qapi/error: add (Error **errp) cleaning APIs

2020-02-20 Thread Markus Armbruster
Vladimir Sementsov-Ogievskiy  writes:

> Add functions to clean Error **errp: call corresponding Error *err
> cleaning function an set pointer to NULL.
>
> New functions:
>   error_free_errp
>   error_report_errp
>   warn_report_errp
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> Reviewed-by: Greg Kurz 
> Reviewed-by: Eric Blake 
> ---
>
> CC: Eric Blake 
> CC: Kevin Wolf 
> CC: Max Reitz 
> CC: Greg Kurz 
> CC: Stefano Stabellini 
> CC: Anthony Perard 
> CC: Paul Durrant 
> CC: Stefan Hajnoczi 
> CC: "Philippe Mathieu-Daudé" 
> CC: Laszlo Ersek 
> CC: Gerd Hoffmann 
> CC: Stefan Berger 
> CC: Markus Armbruster 
> CC: Michael Roth 
> CC: qemu-bl...@nongnu.org
> CC: xen-de...@lists.xenproject.org
>
>  include/qapi/error.h | 26 ++
>  1 file changed, 26 insertions(+)
>
> diff --git a/include/qapi/error.h b/include/qapi/error.h
> index ad5b6e896d..d34987148d 100644
> --- a/include/qapi/error.h
> +++ b/include/qapi/error.h
> @@ -309,6 +309,32 @@ void warn_reportf_err(Error *err, const char *fmt, ...)
>  void error_reportf_err(Error *err, const char *fmt, ...)
>  GCC_FMT_ATTR(2, 3);
>  
> +/*
> + * Functions to clean Error **errp: call corresponding Error *err cleaning
> + * function, then set pointer to NULL.
> + */
> +static inline void error_free_errp(Error **errp)
> +{
> +assert(errp && *errp);
> +error_free(*errp);
> +*errp = NULL;
> +}
> +
> +static inline void error_report_errp(Error **errp)
> +{
> +assert(errp && *errp);
> +error_report_err(*errp);
> +*errp = NULL;
> +}
> +
> +static inline void warn_report_errp(Error **errp)
> +{
> +assert(errp && *errp);
> +warn_report_err(*errp);
> +*errp = NULL;
> +}
> +
> +
>  /*
>   * Just like error_setg(), except you get to specify the error class.
>   * Note: use of error classes other than ERROR_CLASS_GENERIC_ERROR is

These appear to be unused apart from the Coccinelle script in PATCH 03.

They are used in the full "[RFC v5 000/126] error: auto propagated
local_err" series.  Options:

1. Pick a few more patches into this part I series, so these guys come
   with users.

2. Punt this patch to the first part that has users, along with the
   part of the Coccinelle script that deals with them.

3. Do nothing: accept the functions without users.

I habitually dislike 3., but reviewing the rest of this series might
make me override that dislike.




[Bug 1857811] Re: qemu user static binary seems to lack support for network namespace.

2020-02-20 Thread crocket
I just called _configure_loopback_interface in a qemu-aarch64 chroot,
and the error is not reproducible with qemu-4.2.0. Has it been fixed?

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1857811

Title:
  qemu user static binary seems to lack support for network namespace.

Status in QEMU:
  New

Bug description:
  Whenever I execute emerge in gentoo linux in qemu-aarch64 chroot, I
  see the following error message.

  Unable to configure loopback interface: Operation not supported

  If I disable emerge's network-sandbox which utilizes network
  namespace, the error disappears.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1857811/+subscriptions



[PATCH] virtiofsd: Remove fuse.h and struct fuse_module

2020-02-20 Thread Xiao Yang
All code in fuse.h and struct fuse_module are not used by virtiofsd
so removing them is safe.

Signed-off-by: Xiao Yang 
---
 tools/virtiofsd/fuse.h   | 1229 --
 tools/virtiofsd/fuse_i.h |   16 -
 2 files changed, 1245 deletions(-)
 delete mode 100644 tools/virtiofsd/fuse.h

diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h
deleted file mode 100644
index aba13fef2d..00
--- a/tools/virtiofsd/fuse.h
+++ /dev/null
@@ -1,1229 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi 
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB.
- */
-
-#ifndef FUSE_H_
-#define FUSE_H_
-
-/*
- *
- * This file defines the library interface of FUSE
- *
- * IMPORTANT: you should define FUSE_USE_VERSION before including this header.
- */
-
-#include "fuse_common.h"
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-/*
- * Basic FUSE API
- */
-
-/** Handle for a FUSE filesystem */
-struct fuse;
-
-/**
- * Readdir flags, passed to ->readdir()
- */
-enum fuse_readdir_flags {
-/**
- * "Plus" mode.
- *
- * The kernel wants to prefill the inode cache during readdir.  The
- * filesystem may honour this by filling in the attributes and setting
- * FUSE_FILL_DIR_FLAGS for the filler function.  The filesystem may also
- * just ignore this flag completely.
- */
-FUSE_READDIR_PLUS = (1 << 0),
-};
-
-enum fuse_fill_dir_flags {
-/**
- * "Plus" mode: all file attributes are valid
- *
- * The attributes are used by the kernel to prefill the inode cache
- * during a readdir.
- *
- * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set
- * and vice versa.
- */
-FUSE_FILL_DIR_PLUS = (1 << 1),
-};
-
-/**
- * Function to add an entry in a readdir() operation
- *
- * The *off* parameter can be any non-zero value that enables the
- * filesystem to identify the current point in the directory
- * stream. It does not need to be the actual physical position. A
- * value of zero is reserved to indicate that seeking in directories
- * is not supported.
- *
- * @param buf the buffer passed to the readdir() operation
- * @param name the file name of the directory entry
- * @param stat file attributes, can be NULL
- * @param off offset of the next entry or zero
- * @param flags fill flags
- * @return 1 if buffer is full, zero otherwise
- */
-typedef int (*fuse_fill_dir_t)(void *buf, const char *name,
-   const struct stat *stbuf, off_t off,
-   enum fuse_fill_dir_flags flags);
-/**
- * Configuration of the high-level API
- *
- * This structure is initialized from the arguments passed to
- * fuse_new(), and then passed to the file system's init() handler
- * which should ensure that the configuration is compatible with the
- * file system implementation.
- */
-struct fuse_config {
-/**
- * If `set_gid` is non-zero, the st_gid attribute of each file
- * is overwritten with the value of `gid`.
- */
-int set_gid;
-unsigned int gid;
-
-/**
- * If `set_uid` is non-zero, the st_uid attribute of each file
- * is overwritten with the value of `uid`.
- */
-int set_uid;
-unsigned int uid;
-
-/**
- * If `set_mode` is non-zero, the any permissions bits set in
- * `umask` are unset in the st_mode attribute of each file.
- */
-int set_mode;
-unsigned int umask;
-
-/**
- * The timeout in seconds for which name lookups will be
- * cached.
- */
-double entry_timeout;
-
-/**
- * The timeout in seconds for which a negative lookup will be
- * cached. This means, that if file did not exist (lookup
- * retuned ENOENT), the lookup will only be redone after the
- * timeout, and the file/directory will be assumed to not
- * exist until then. A value of zero means that negative
- * lookups are not cached.
- */
-double negative_timeout;
-
-/**
- * The timeout in seconds for which file/directory attributes
- * (as returned by e.g. the `getattr` handler) are cached.
- */
-double attr_timeout;
-
-/**
- * Allow requests to be interrupted
- */
-int intr;
-
-/**
- * Specify which signal number to send to the filesystem when
- * a request is interrupted.  The default is hardcoded to
- * USR1.
- */
-int intr_signal;
-
-/**
- * Normally, FUSE assigns inodes to paths only for as long as
- * the kernel is aware of them. With this option inodes are
- * instead remembered for at least this many seconds.  This
- * will require more memory, but may be necessary when using
- * applications that make use of inode numbers.
- *
- * A number of -1 means that inodes will be remembered for the
- * entire life-time of the file-system process.
- */
-int remember;
-
-/**
-  

[PATCH] hw/ide: Remove status register read side effect

2020-02-20 Thread jasper.lowell
The Linux libATA API documentation mentions that on some hardware,
reading the status register has the side effect of clearing the
interrupt condition. When emulating the generic Sun4u machine running
Solaris 10, the Solaris 10 CMD646 driver exits fatally because of this
emulated side effect. This side effect is likely to not exist on real
CMD646 hardware.

Signed-off-by: Jasper Lowell 
---
 hw/ide/core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 8eb766..82fd0632ac 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2210,7 +2210,6 @@ uint32_t ide_ioport_read(void *opaque, uint32_t addr)
 } else {
 ret = s->status;
 }
-qemu_irq_lower(bus->irq);
 break;
 }
 
-- 
2.24.1




RE: RFC: Split EPT huge pages in advance of dirty logging

2020-02-20 Thread Zhoujian (jay)


> -Original Message-
> From: Peter Xu [mailto:pet...@redhat.com]
> Sent: Friday, February 21, 2020 2:17 AM
> To: Ben Gardon 
> Cc: Zhoujian (jay) ; Junaid Shahid
> ; k...@vger.kernel.org; qemu-devel@nongnu.org;
> pbonz...@redhat.com; dgilb...@redhat.com; quint...@redhat.com;
> Liujinsong (Paul) ; linfeng (M)
> ; wangxin (U) ;
> Huangweidong (C) 
> Subject: Re: RFC: Split EPT huge pages in advance of dirty logging
> 
> On Thu, Feb 20, 2020 at 09:34:52AM -0800, Ben Gardon wrote:
> > On Thu, Feb 20, 2020 at 5:53 AM Zhoujian (jay) 
> wrote:
> > >
> > >
> > >
> > > > -Original Message-
> > > > From: Peter Xu [mailto:pet...@redhat.com]
> > > > Sent: Thursday, February 20, 2020 1:19 AM
> > > > To: Zhoujian (jay) 
> > > > Cc: k...@vger.kernel.org; qemu-devel@nongnu.org;
> > > > pbonz...@redhat.com; dgilb...@redhat.com; quint...@redhat.com;
> > > > Liujinsong (Paul) ; linfeng (M)
> > > > ; wangxin (U)
> ;
> > > > Huangweidong (C) 
> > > > Subject: Re: RFC: Split EPT huge pages in advance of dirty logging
> > > >
> > > > On Wed, Feb 19, 2020 at 01:19:08PM +, Zhoujian (jay) wrote:
> > > > > Hi Peter,
> > > > >
> > > > > > -Original Message-
> > > > > > From: Peter Xu [mailto:pet...@redhat.com]
> > > > > > Sent: Wednesday, February 19, 2020 1:43 AM
> > > > > > To: Zhoujian (jay) 
> > > > > > Cc: k...@vger.kernel.org; qemu-devel@nongnu.org;
> > > > pbonz...@redhat.com;
> > > > > > dgilb...@redhat.com; quint...@redhat.com; Liujinsong (Paul)
> > > > > > ; linfeng (M) ;
> > > > > > wangxin (U) ; Huangweidong (C)
> > > > > > 
> > > > > > Subject: Re: RFC: Split EPT huge pages in advance of dirty
> > > > > > logging
> > > > > >
> > > > > > On Tue, Feb 18, 2020 at 01:13:47PM +, Zhoujian (jay) wrote:
> > > > > > > Hi all,
> > > > > > >
> > > > > > > We found that the guest will be soft-lockup occasionally
> > > > > > > when live migrating a 60 vCPU, 512GiB huge page and memory
> > > > > > > sensitive VM. The reason is clear, almost all of the vCPUs
> > > > > > > are waiting for the KVM MMU spin-lock to create 4K SPTEs
> > > > > > > when the huge pages are write protected. This
> > > > > > phenomenon is also described in this patch set:
> > > > > > > https://patchwork.kernel.org/cover/11163459/
> > > > > > > which aims to handle page faults in parallel more efficiently.
> > > > > > >
> > > > > > > Our idea is to use the migration thread to touch all of the
> > > > > > > guest memory in the granularity of 4K before enabling dirty
> > > > > > > logging. To be more specific, we split all the PDPE_LEVEL
> > > > > > > SPTEs into DIRECTORY_LEVEL SPTEs as the first step, and then
> > > > > > > split all the DIRECTORY_LEVEL SPTEs into
> > > > > > PAGE_TABLE_LEVEL SPTEs as the following step.
> > > > > >
> > > > > > IIUC, QEMU will prefer to use huge pages for all the anonymous
> > > > > > ramblocks (please refer to ram_block_add):
> > > > > >
> > > > > > qemu_madvise(new_block->host, new_block->max_length,
> > > > > > QEMU_MADV_HUGEPAGE);
> > > > >
> > > > > Yes, you're right
> > > > >
> > > > > >
> > > > > > Another alternative I can think of is to add an extra
> > > > > > parameter to QEMU to explicitly disable huge pages (so that
> > > > > > can even be MADV_NOHUGEPAGE instead of MADV_HUGEPAGE).
> > > > > > However that
> > > > should also
> > > > > > drag down the performance for the whole lifecycle of the VM.
> > > > >
> > > > > From the performance point of view, it is better to keep the
> > > > > huge pages when the VM is not in the live migration state.
> > > > >
> > > > > > A 3rd option is to make a QMP
> > > > > > command to dynamically turn huge pages on/off for ramblocks
> globally.
> > > > >
> > > > > We're searching a dynamic method too.
> > > > > We plan to add two new flags for each memory slot, say
> > > > > KVM_MEM_FORCE_PT_DIRECTORY_PAGES and
> > > > > KVM_MEM_FORCE_PT_PAGE_TABLE_PAGES. These flags can be set
> > > > > through KVM_SET_USER_MEMORY_REGION ioctl.
> 
> [1]
> 
> > > > >
> > > > > The mapping_level which is called by tdp_page_fault in the
> > > > > kernel side will return PT_DIRECTORY_LEVEL if the
> > > > KVM_MEM_FORCE_PT_DIRECTORY_PAGES
> > > > > flag of the memory slot is set, and return PT_PAGE_TABLE_LEVEL
> > > > > if the KVM_MEM_FORCE_PT_PAGE_TABLE_PAGES flag is set.
> > > > >
> > > > > The key steps to split the huge pages in advance of enabling
> > > > > dirty log is as follows:
> > > > > 1. The migration thread in user space uses
> > > > KVM_SET_USER_MEMORY_REGION
> > > > > ioctl to set the KVM_MEM_FORCE_PT_DIRECTORY_PAGES flag for each
> > > > memory
> > > > > slot.
> > > > > 2. The migration thread continues to use the
> > > > > KVM_SPLIT_HUGE_PAGES ioctl (which is newly added) to do the
> > > > > splitting of large pages in the kernel side.
> > > > > 3. A new vCPU is created temporally(do some initialization but
> > > > > will not
> > > > > run) to help to do the work, i.e. as the parameter of the 
> > > > > tdp_page_fault.
> > > > > 4. Collect the GPA 

[RFC v3 2/3] acpi:pci-expender-bus: Add pxb support for arm

2020-02-20 Thread Yubo Miao
From: miaoyubo 

Currently virt machine is not supported by pxb-pcie,
and only one main host bridge described in ACPI tables.
In this patch,PXB-PCIE is supproted by arm and certain
resource is allocated for each pxb-pcie in acpi table.
The resource for the main host bridge is also reallocated.

Signed-off-by: miaoyubo 
---
 hw/arm/virt-acpi-build.c | 125 +++
 hw/pci-host/gpex.c   |   4 ++
 include/hw/arm/virt.h|   7 +++
 3 files changed, 125 insertions(+), 11 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 0540234b8a..2c1e0d2aaa 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -49,6 +49,8 @@
 #include "kvm_arm.h"
 #include "migration/vmstate.h"
 
+#include "hw/arm/virt.h"
+#include "hw/pci/pci_bus.h"
 #define ARM_SPI_BASE 32
 
 static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus)
@@ -271,19 +273,117 @@ static void acpi_dsdt_add_pci_osc(Aml *dev, Aml *scope)
 }
 
 static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
-  uint32_t irq, bool use_highmem, bool 
highmem_ecam)
+  uint32_t irq, bool use_highmem, bool 
highmem_ecam,
+  VirtMachineState *vms)
 {
 int ecam_id = VIRT_ECAM_ID(highmem_ecam);
-Aml *method, *crs;
+Aml *method, *dev, *crs;
+int count = 0;
 hwaddr base_mmio = memmap[VIRT_PCIE_MMIO].base;
 hwaddr size_mmio = memmap[VIRT_PCIE_MMIO].size;
 hwaddr base_pio = memmap[VIRT_PCIE_PIO].base;
 hwaddr size_pio = memmap[VIRT_PCIE_PIO].size;
 hwaddr base_ecam = memmap[ecam_id].base;
 hwaddr size_ecam = memmap[ecam_id].size;
+/*
+ * 0x60 would be enough for pxb device
+ * if it is too small, there is no enough space
+ * for a pcie device plugged in a pcie-root port
+ */
+hwaddr size_addr = 0x60;
+hwaddr size_io = 0x4000;
 int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN;
+int root_bus_limit = 0xFF;
+PCIBus *bus = NULL;
+bus = VIRT_MACHINE(vms)->bus;
+
+if (bus) {
+QLIST_FOREACH(bus, >child, sibling) {
+uint8_t bus_num = pci_bus_num(bus);
+uint8_t numa_node = pci_bus_numa_node(bus);
+
+if (!pci_bus_is_root(bus)) {
+continue;
+}
+if (bus_num < root_bus_limit) {
+root_bus_limit = bus_num - 1;
+}
+count++;
+dev = aml_device("PC%.02X", bus_num);
+aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A08")));
+aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03")));
+aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
+aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
+aml_append(dev, aml_name_decl("_SEG", aml_int(0)));
+aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
+aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
+aml_append(dev, aml_name_decl("_STR", aml_unicode("pxb Device")));
+if (numa_node != NUMA_NODE_UNASSIGNED) {
+method = aml_method("_PXM", 0, AML_NOTSERIALIZED);
+aml_append(method, aml_return(aml_int(numa_node)));
+aml_append(dev, method);
+}
+
+acpi_dsdt_add_pci_route_table(dev, scope, nr_pcie_buses, irq);
+
+method = aml_method("_CBA", 0, AML_NOTSERIALIZED);
+aml_append(method, aml_return(aml_int(base_ecam)));
+aml_append(dev, method);
+
+method = aml_method("_CRS", 0, AML_NOTSERIALIZED);
+Aml *rbuf = aml_resource_template();
+aml_append(rbuf,
+   aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED,
+   AML_POS_DECODE, 0x,
+   bus_num, bus_num + 1, 0x,
+   2));
+aml_append(rbuf,
+   aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED,
+AML_MAX_FIXED, AML_NON_CACHEABLE,
+AML_READ_WRITE, 0x,
+base_mmio + size_mmio -
+size_addr * count,
+base_mmio + size_mmio - 1 -
+size_addr * (count - 1),
+0x, size_addr));
+aml_append(rbuf,
+   aml_dword_io(AML_MIN_FIXED, AML_MAX_FIXED,
+   AML_POS_DECODE, AML_ENTIRE_RANGE,
+   0x, size_pio - size_io * count,
+   size_pio - 1 - size_io * (count - 1),
+   base_pio, size_io));
+
+if (use_highmem) {
+hwaddr base_mmio_high = memmap[VIRT_HIGH_PCIE_MMIO].base;
+

[RFC v3 0/3] pci_expander_brdige:acpi:Support pxb-pcie for ARM

2020-02-20 Thread Yubo Miao
From: miaoyubo 

Currently pxb-pcie is not supported by arm, the reason for it is
pxb-pcie is not described in DSDT table and only one main host bridge
is described in acpi tables, which means it is not impossible to
present different io numas for different devices, especially
host-passthrough devices.

This series of patches make arm to support PXB-PCIE.

Users can configure pxb-pcie with certain numa, Example command
is:

   -device pxb-pcie,id=pci.7,bus_nr=128,numa_node=0,bus=pcie.0,addr=0x9

Since the bus of pxb-pcie is root bus, devices could not be plugged
into pxb-pcie directly,pcie-root-port or pci-bridge should be defined
and plugged on pxb-pcie, then the device could be plugged onto the
pcie-root-port or pci-bridge.

With the patches,io numa could be presented to the guest by define a
pxb-pcie with the numa and plug the device on it.

miaoyubo (3):
  acpi:Extract two APIs from acpi_dsdt_add_pci
  acpi:pci-expender-bus: Add pxb support for arm
  ACPI/unit-test: Add a new test for pxb-pcie for arm

 hw/arm/virt-acpi-build.c   | 240 -
 hw/pci-host/gpex.c |   4 +
 include/hw/arm/virt.h  |   7 +
 tests/data/acpi/virt/DSDT.pxb  | Bin 0 -> 34209 bytes
 tests/qtest/bios-tables-test.c |  54 +++-
 5 files changed, 238 insertions(+), 67 deletions(-)
 create mode 100644 tests/data/acpi/virt/DSDT.pxb

-- 
2.19.1





[RFC v3 3/3] ACPI/unit-test: Add a new test for pxb-pcie for arm

2020-02-20 Thread Yubo Miao
From: miaoyubo 

Currently, pxb-pcie could be defined by the cmdline like
--device pxb-pcie,id=pci.9,bus_nr=128
However pxb-pcie is not described in acpi tables for arm.

The formal two patches support pxb-pcie for arm, escpcially the
specification for pxb-pcie in DSDT table.

Add a testcase to make sure the ACPI table is correct for guest.

Signed-off-by: miaoyubo 
---
 tests/data/acpi/virt/DSDT.pxb  | Bin 0 -> 34209 bytes
 tests/qtest/bios-tables-test.c |  54 +
 2 files changed, 48 insertions(+), 6 deletions(-)
 create mode 100644 tests/data/acpi/virt/DSDT.pxb

diff --git a/tests/data/acpi/virt/DSDT.pxb b/tests/data/acpi/virt/DSDT.pxb
new file mode 100644
index 
..4eea3192c75ff28f7054d626a9363ca025b6c0ad
GIT binary patch
literal 34209
zcmeI*cXU+szJ~D)1PGxe5PG+us9-{YGz}UAMT!L#ks?x*Dx!d5hoIPd
z?}}o>iWL;GW5HgrlKbvVM??^)~qbMIProvd|8p2_U*%qO!m?AgcPkRQ(HLbszg^Hd*
zYT59@{GfDxK}u{$QSzH5MFX?4va_qcnOYVriD$G-YqqdX5KgQUqzA#0T0ymH9aJ-P
zt=#;Qdf_)p=V$jH6t9{xXmH68P3ev)8EFlwrs(=X$_(9dxJh>6UU8FZi5vcVla%Bp
zz50)g^-pXvw4i9XAYFAU@nN}Xb+t___n%uw)`8L
z7F4goX88!*;pB+$X8_2BOj*;OO*!h6xx+mI)uU#l*o>||BPVi3ji?#5Y(|dH
z=oUF6C2B^h<}5a88su#W_0%%JtAk+ikeZ+X7unGJtJq-j+)WHX7uzKy&`9%
zM$JgK8NGZ)@5mW-qGmL*8NF>rHhzu%+CS2wW@OrogM3EhZpvShs#sHrYy`MIZnvrEQ2KtQX{j^2YjIK7L
z&}T!Avqbh8+KeGpGv;lrQ5@@4bw`Q^7iZ;W_0oY$Ob29%%1Indg_l(x
zNW=>U=@nIzyEz+7!38n{QgZ4pwa9l7gm>7)H=Rj93E*aK%`Pv$F
zgOo;fQ|eY!1OwuQSwZ6KYWg}mXjCt}JgvH-Ycgsma)L(nQ|eVz=ol|?y{Zbg(iIyd
zzE`=OuA0nnOU%j8vc92DN7u3EXJGTc)4rt-QqDz{ADgBn4%8;u
z5?ku3m0K#qEp?A7x70yl;<{B;Wvslaj+?5K$#iE;IBQnkX3|+Rv1RhkE8STOP8|vB
z9we`-<1BY7-B}yX+HlsUQ|ZoBI8!UPO#Vo8T;@)tJL|xyWB9~RukuGyhfbwC)8I^l
zQ^#@cRJyY+oOR)>OQ+JEdLdztXFWJ|+~-cEJL|()AI|!8D&5%t%Bx
zA)F2ARJv2Iq0E^Mr;aP#sdQ%soEdOt(5ZB1BRCtuspC+0D&5%_<*yrc>$8OgJ;)
z)N!jjmF{c;XA?M^(5ZB1Q#hN#spDLCD&5%(_Zly~4D|vpJkPE_SEVoh{
z0cQ(3mF{c_XG=JB9PLh}J6plo3eHw^D&5%{}1xZ9mdcea7E4V-Q0RJyY*oNeLM
zak@K|?raBVJ2>0XsdQ(1INQUi<9c^0-Kn!8+7pt0vvq*81D#5D?g8f>aO!n{JC*L-
z6V5&1+>=hFJNJTfFF5tO!JSHX?hWVOaPCc~(w!aQ>*#*unaO!oEJC*J{5Y7YPJdjSMJ9RqFo^P|@)axpDD&48GaQ6F|4QDo;
zN_Xnqo3>0Id3J?UufyD_bf?a^Y0KpCZ4R6{bSmAc^K9nq2B%)Pxl`#*on13$cR0J#
zsdT5#shP6}oO+$-PNh3_Ce55Z;p|DL(w#bgX3k!4>UE(zmG0D8GjsNavp1becj{c3
zIS+zUuOr>5bf?acne$*c52jP;PMsGs=OJ+Fb*DR(?$p^ZbM}F=51mSP>KvFk`@*T$
zsqR#|Q)j-+c_^HR(y4T3KRElr*^f@8J9U=JoVjr3(y4T(}956(O~mG0CTEpx`;
zjM1rdr_N((%j7HId^q#zRJv1VugqBhX91l`cj}y#Is3!epH8Jab*9Rk1K=D$r_!A|
zKV{B=a1NwX=}w)MGG`&2g>)+2sdG{0JPgjm=v2B>XQ0e^IGl#JDGD3oP+38
zx>IMH%vl6y5uHkR>Kv0f2g5m-PNh3_X33mG;2c7y(w)U{7QIFE$$NII47EQPZa+?i>y0XgEjHsdVR2
za2^HcQFJQZc{H3y!+A8FN_UpQSq5hrol19(fpZL;W9U@6^B6dff%6zTmF^r1=U6z$
z(y4UkI5@|_IgU=HJCB9)SU8WRQ|V5f@3!aLeO=LvKw-FYIM
zC}1m{U`oCRK(JQdDU
z=~TM#^E5h@?wkteR5+*7sdVS*aGnn5>2xaHIStNfa89FB>CQ9YJOj=%=v2CM
zI-JwtoKC0GoipH^0p|=lmF}Dg=S(
zHk@a}c{ZI)_1IdGmsr_!AZ;9LOb0y>rMTnOhvI2Y2Xbmt;C7s0uRPNh2+!?_sF
z#dIp!>CUBaE`@U`ol19}3+K6To=d0Foy*`{2In$5mF_$b|rk4~jK
zm&3Un}-FZHo=finEol19J0Otj8UO=bPoh#s60p|)jmF`>#=SnzN(y4UkDmYic
zxr$DuJ1>OuLO3s^Q|Zo&;JgUVi|ACkb2Xf+;ap9p(w!H>c`=+9)2VdlC2(E>=OuJ1
z-FYdTm%@1|ol19J2IpmPUPhc%sdVQRa9#oD6?7`yc_o}z!g(c~N_Vb-
za}Atp=v2CMEu3rNTuZ0Yo#k+r!$J(w*zzTnFbmI+gCc3eKzGyoyexJFkZGYB;Z^
zQ|Zq2aIS}QJ)KH-UIXVfa9%^F(w*1Bc`cmR(y4Ukb#Pt>=XG={-FZEn*TZ=|ol1A!
z0Ot*G-ax0)oj1aHBb+zVsdVQ}aNY#xO>`>Vc{7|h!+A5EN_XA@=PhvFLZ{N5x59ZV
zoVU`cbmwhw-UjDwbSm9>JDj(}c{`m-cisW#9dO=3r_!Bw!g(j0chaeJ=Us5#1?OFK
zD&2WEoOi={H=Rm%-UH`7aNa|w(w+Chc`uyz(y4UkeQ@3f=Y4c4-FZKp_rrNVol18;
z0Otd6K0v3^oe#qKAe;}cRmg0({Mgbr_!Cz!1)ZE&(NuK=d*A=3+JCSiHdaDGjv
z(w*PH`3;=k(5ZCiw{U(7=eKk!-T57y-@*AEol19p59jxAeov>;oj<_&1Drq5sdVR$
zaQ+DAk8~>C`4gN!!TA%NN_YMY=g)BdOsCSFyWrdf=Po*x?)(MLU*PQ=sg$DTpr{X4?<_uyrj
zwaVfp@oH70s+PTmYgGTcQF}#`>Z77>&@Tve8f!u;!FOyyTI
zg7yDrx11UuUR)fc#)g#)3NriGDH{@xr7WEs#PUaGmk+GHYC=ZiwDi)VZ8^KzweGii
zoT=cXO;?lX&4G}~F!
zVq>hRyw0jg`T^FcRxSKP@A*=6p7rr~*5~7?^v6@r#F~>_W7PnUX9GT-N`E{X;_+;V$Fm_HPo+Pe>3BTT
z@pz^uwoJY;fYO~AaAv@nL8sE4jo@qqXCpe5?raQaV>lbrsdQ&1oSATD(y4T36F8f|
z*@RA|JDbAU6wan}D&48SXKjzt->u<6P1Tz~G^11L_Vz^vjvVP@1k5|5z
z$zE{oMW@o8IzHBx$u;MD!?`z|N_XmTSX(BaZ#%--kxr#M_knXCIQOAb>CS!O+!xM$
z=~TM26P%sk>_n&1o%_MLADsKqsdT4~-R<$*AI|;hRJ!v3I1hmH06LZK>CS98v*FC9Q|ZpGaCU{WE1gPr=D?W)
zXAYf8cXorb8=T$fRJyY}oZaEcvCXAe4+?(7L?PdIzhsdQ_i%z9G
zd?-FXn42f=v|ol18e4Cldc9!#gworl1A2%Lw|sdQ%_IQziahfbwCb+*8s
zZ~Ma8mrkWS4~6qkI1i;$>CS#|_Jgw@ol1A+!kG(aE}cqu=E0cCSvO^Wn^=Q|ZnEI1At`pi}A2{&4n(vp=0mcMgDa0GtEpRJwB@oCDz;NT<@Bg>V+a
zSxBeSorl4B7@UXEsdVSza2^il;dCn9IS9@{a1NqV>CPfJi{LDxQ|ZpZa1MrZFr7+w
z4uNwBoI~hTy0aM0VmOQGRJwC0oI~LpN~hADC2*F`CWx^ozu!{8i7
zr_!Cn;T#UCQ4Z%it`d
zQ|V5fme=3e$@6_<;2cAz(w)b^c?_J#(5ZCiSUAVRIhIbPJIBE}4$g6OD&2W3oX5g>
zES*Ys9tY=fa2`je(w*bs91rJsI+gA`9?s+8Jf2RaJ14+70nQ0@D&08|*Lk6
zNpMbra}u3OcTR?LGMtm?RJ!v7I8T7{1Ui-OJQ2x^oJgQ{bFJr_!CLz3g@YGD&2V+oTtHg8l6gaPK9$SoKxvk
zy7P26Plxk#I+gC62In+5r_rf&=NWLG0p}TXD&099<98F0>ka|WGCcg}=!
zCY&?rRJwB(oU`DZMW@o8XTo_VoM+Ohbmwe1XTv$0PNh4~g7Yjm&!SW5*<-fpZR>
zN_WnMb1s~7=~TLN9-Q;woJXh9o%7+G59fS3mF_$n>e>n@*)W=wCIM1O|>COdk
zE`W0Zol18ugmWRB3+Yt4a}k`2;9Nwf(w)8I+gBR0_PGqm(Zzn=TbPA!nu@A

[RFC v3 1/3] acpi:Extract two APIs from acpi_dsdt_add_pci

2020-02-20 Thread Yubo Miao
From: miaoyubo 

Extract two APIs acpi_dsdt_add_pci_route_table and
acpi_dsdt_add_pci_osc form acpi_dsdt_add_pci. The first
API is used to specify the pci route table and the second
API is used to declare the operation system capabilities.
These two APIs would be used to specify the pxb-pcie in DSDT.

Signed-off-by: miaoyubo 
---
 hw/arm/virt-acpi-build.c | 135 ++-
 1 file changed, 75 insertions(+), 60 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index bd5f771e9b..0540234b8a 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -151,30 +151,12 @@ static void acpi_dsdt_add_virtio(Aml *scope,
 }
 }
 
-static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
-  uint32_t irq, bool use_highmem, bool 
highmem_ecam)
+static void acpi_dsdt_add_pci_route_table(Aml *dev, Aml *scope,
+  int nr_pcie_buses,
+  uint32_t irq)
 {
-int ecam_id = VIRT_ECAM_ID(highmem_ecam);
-Aml *method, *crs, *ifctx, *UUID, *ifctx1, *elsectx, *buf;
 int i, bus_no;
-hwaddr base_mmio = memmap[VIRT_PCIE_MMIO].base;
-hwaddr size_mmio = memmap[VIRT_PCIE_MMIO].size;
-hwaddr base_pio = memmap[VIRT_PCIE_PIO].base;
-hwaddr size_pio = memmap[VIRT_PCIE_PIO].size;
-hwaddr base_ecam = memmap[ecam_id].base;
-hwaddr size_ecam = memmap[ecam_id].size;
-int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN;
-
-Aml *dev = aml_device("%s", "PCI0");
-aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A08")));
-aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03")));
-aml_append(dev, aml_name_decl("_SEG", aml_int(0)));
-aml_append(dev, aml_name_decl("_BBN", aml_int(0)));
-aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
-aml_append(dev, aml_name_decl("_UID", aml_string("PCI0")));
-aml_append(dev, aml_name_decl("_STR", aml_unicode("PCIe 0 Device")));
-aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
-
+Aml *method, *crs;
 /* Declare the PCI Routing Table. */
 Aml *rt_pkg = aml_varpackage(nr_pcie_buses * PCI_NUM_PINS);
 for (bus_no = 0; bus_no < nr_pcie_buses; bus_no++) {
@@ -190,7 +172,6 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry 
*memmap,
 }
 aml_append(dev, aml_name_decl("_PRT", rt_pkg));
 
-/* Create GSI link device */
 for (i = 0; i < PCI_NUM_PINS; i++) {
 uint32_t irqs =  irq + i;
 Aml *dev_gsi = aml_device("GSI%d", i);
@@ -210,42 +191,11 @@ static void acpi_dsdt_add_pci(Aml *scope, const 
MemMapEntry *memmap,
 aml_append(dev_gsi, method);
 aml_append(dev, dev_gsi);
 }
+}
 
-method = aml_method("_CBA", 0, AML_NOTSERIALIZED);
-aml_append(method, aml_return(aml_int(base_ecam)));
-aml_append(dev, method);
-
-method = aml_method("_CRS", 0, AML_NOTSERIALIZED);
-Aml *rbuf = aml_resource_template();
-aml_append(rbuf,
-aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE,
-0x, 0x, nr_pcie_buses - 1, 0x,
-nr_pcie_buses));
-aml_append(rbuf,
-aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED,
- AML_NON_CACHEABLE, AML_READ_WRITE, 0x, base_mmio,
- base_mmio + size_mmio - 1, 0x, size_mmio));
-aml_append(rbuf,
-aml_dword_io(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE,
- AML_ENTIRE_RANGE, 0x, 0x, size_pio - 1, base_pio,
- size_pio));
-
-if (use_highmem) {
-hwaddr base_mmio_high = memmap[VIRT_HIGH_PCIE_MMIO].base;
-hwaddr size_mmio_high = memmap[VIRT_HIGH_PCIE_MMIO].size;
-
-aml_append(rbuf,
-aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED,
- AML_NON_CACHEABLE, AML_READ_WRITE, 0x,
- base_mmio_high,
- base_mmio_high + size_mmio_high - 1, 0x,
- size_mmio_high));
-}
-
-aml_append(method, aml_name_decl("RBUF", rbuf));
-aml_append(method, aml_return(rbuf));
-aml_append(dev, method);
-
+static void acpi_dsdt_add_pci_osc(Aml *dev, Aml *scope)
+{
+Aml *method, *UUID, *ifctx, *ifctx1, *elsectx, *buf;
 /* Declare an _OSC (OS Control Handoff) method */
 aml_append(dev, aml_name_decl("SUPP", aml_int(0)));
 aml_append(dev, aml_name_decl("CTRL", aml_int(0)));
@@ -253,7 +203,8 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry 
*memmap,
 aml_append(method,
 aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1"));
 
-/* PCI Firmware Specification 3.0
+/*
+ * PCI Firmware Specification 3.0
  * 4.5.1. _OSC Interface for PCI Host Bridge Devices
  * The _OSC interface for a PCI/PCI-X/PCI Express hierarchy is
  * 

Re: [PATCH] console: make QMP screendump use coroutine

2020-02-20 Thread Markus Armbruster
"Dr. David Alan Gilbert"  writes:

> * Markus Armbruster (arm...@redhat.com) wrote:
[...]
>> Collecting several users before building infrastructure makes sense when
>> the design of the infrastructure isn't obvious, or when the need for it
>> is in doubt.
>> 
>> Neither is the case for running QMP handlers in a coroutine: QMP
>> commands blocking the main loop is without doubt a problem we need to
>> solve, and the way to solve it was obvious enough for Kevin to do it
>> with one user: block_resize.  A second one quickly followed: screendump.
>> 
>> The only part that's different for HMP, I think, is "need".
>> 
>> Is HMP blocking the main loop a problem?
>> 
>> If yes, is it serious enough to justify solving it?
>
> I don't mind if HMP blocks for a small time while doing something, but
> not if it can hang if the guest (or something else like it) misbehaves.
> Not if it's something you might need to issue another command to recover
> from.

The issue isn't HMP being unavailable while a command executes.  The
issue is HMP stopping the main loop while a command executes.

Stopping the main loop not only stops everything running there, it can
also stop other threads when they synchronize with the main loop via the
Big QEMU Lock.

The obvious example is a command accessing a remote filesystem.  Special
case: NFS with the hard option can hang indefinitely.

screendump does that, and also waits for asynchronous gfx_update() with
qxl devices.  Networking again, with a different peer.

We already decided that QMP commands stopping the main loop is serious.

To say it's not serious for HMP amounts to "don't do that then, use
QMP".  Which may be fair.  Not for me to decide, though.




Re: [PATCH] hw/char/pl011: Enable TxFIFO and async transmission

2020-02-20 Thread no-reply
Patchew URL: https://patchew.org/QEMU/20200221044908.266883-1-gs...@redhat.com/



Hi,

This series failed the docker-quick@centos7 build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
make docker-image-centos7 V=1 NETWORK=1
time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
=== TEST SCRIPT END ===

  TESTiotest-qcow2: 283
Passed all 116 iotests
**
ERROR:/tmp/qemu-test/src/tests/qtest/acpi-utils.c:145:acpi_find_rsdp_address_uefi:
 code should not be reached
ERROR - Bail out! 
ERROR:/tmp/qemu-test/src/tests/qtest/acpi-utils.c:145:acpi_find_rsdp_address_uefi:
 code should not be reached
make: *** [check-qtest-aarch64] Error 1
Traceback (most recent call last):
  File "./tests/docker/docker.py", line 664, in 
sys.exit(main())
---
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', 
'--label', 'com.qemu.instance.uuid=fa7130c63a4e4192bda02ab29bd45081', '-u', 
'1003', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', 
'-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 
'SHOW_ENV=1', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', 
'/home/patchew2/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', 
'/var/tmp/patchew-tester-tmp-sjyvkaqh/src/docker-src.2020-02-21-01.12.04.15392:/var/tmp/qemu:z,ro',
 'qemu:centos7', '/var/tmp/qemu/run', 'test-quick']' returned non-zero exit 
status 2.
filter=--filter=label=com.qemu.instance.uuid=fa7130c63a4e4192bda02ab29bd45081
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-sjyvkaqh/src'
make: *** [docker-run-test-quick@centos7] Error 2

real16m9.616s
user0m8.253s


The full log is available at
http://patchew.org/logs/20200221044908.266883-1-gs...@redhat.com/testing.docker-quick@centos7/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH 1/2] riscv: roms: Add 32-bit OpenSBI firmware image for sifive_u

2020-02-20 Thread Anup Patel
On Fri, Feb 21, 2020 at 8:08 AM Bin Meng  wrote:
>
> Hi Philippe,
>
> On Fri, Feb 21, 2020 at 1:31 AM Philippe Mathieu-Daudé
>  wrote:
> >
> > Hi Bin,
> >
> > On 2/20/20 3:42 PM, Bin Meng wrote:
> > > Although the real world SiFive HiFive Unleashed board is a 64-bit
> > > hardware configuration, with QEMU it is possible to test 32-bit
> > > configuration with the same hardware features.
> > >
> > > This updates the roms Makefile to add the build rules for creating
> > > the 32-bit OpenSBI firmware image for sifive_u machine. A pre-built
> > > OpenSBI image (built from commit 3e7d666) has been added as the
> > > default bios for 32-bit sifive_u machine.
> >
> > With QEMU:
> >
> > fatal: ambiguous argument '3e7d666': unknown revision or path not in the
> > working tree.
> >
> > This looks like an OpenSBI commit but QEMU only include up to v0.5.
> >
> > Can you build v0.5? Else can you update the submodule?
> >
>
> Will do in v2.

We plan to release OpenSBI v0.6 on monday (24th Feb 2020) so maybe
you can update all RISC-V ROM images based on OpenSBI v0.6 ??

>
> > Also, can you add a CI job to build this, so we have reproducible builds
> > (see QEMU commit 71920809ceabed as example)?
>
> I cannot find any document for how to test CI job with gitlab CI. Does
> QEMU has a public CI runner for testing?
>
> Regards,
> Bin
>

Regards,
Anup



[PATCH] hw/char/pl011: Enable TxFIFO and async transmission

2020-02-20 Thread Gavin Shan
The depth of TxFIFO can be 1 or 16 depending on LCR[4]. The TxFIFO is
disabled when its depth is 1. It's nice to have TxFIFO enabled if
possible because more characters can be piled and transmitted at once,
which would have less overhead. Besides, we can be blocked because of
qemu_chr_fe_write_all(), which isn't nice.

This enables TxFIFO if possible. On ther other hand, the asynchronous
transmission is enabled if needed, as we did in hw/char/cadence_uart.c

Signed-off-by: Gavin Shan 
---
 hw/char/pl011.c | 70 +
 include/hw/char/pl011.h |  2 ++
 2 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index 13e784f9d9..14ed04c8c8 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -169,6 +169,66 @@ static void pl011_set_read_trigger(PL011State *s)
 s->read_trigger = 1;
 }
 
+static gboolean pl011_xmit(GIOChannel *chan, GIOCondition cond, void *opaque)
+{
+PL011State *s = (PL011State *)opaque;
+int ret;
+
+/* instant drain the fifo when there's no back-end */
+if (!qemu_chr_fe_backend_connected(>chr)) {
+s->write_count = 0;
+return FALSE;
+}
+
+if (!s->write_count) {
+return FALSE;
+}
+
+ret = qemu_chr_fe_write(>chr, s->write_fifo, s->write_count);
+if (ret > 0) {
+s->write_count -= ret;
+memmove(s->write_fifo, s->write_fifo + ret, s->write_count);
+s->flags &= ~PL011_FLAG_TXFF;
+if (!s->write_count) {
+s->flags |= PL011_FLAG_TXFE;
+}
+}
+
+if (s->write_count) {
+guint r = qemu_chr_fe_add_watch(>chr, G_IO_OUT | G_IO_HUP,
+pl011_xmit, s);
+if (!r) {
+s->write_count = 0;
+s->flags &= ~PL011_FLAG_TXFF;
+s->flags |= PL011_FLAG_TXFE;
+return FALSE;
+}
+}
+
+s->int_level |= PL011_INT_TX;
+pl011_update(s);
+return FALSE;
+}
+
+static void pl011_write_fifo(void *opaque, const unsigned char *buf, int size)
+{
+PL011State *s = (PL011State *)opaque;
+int depth = (s->lcr & 0x10) ? 16 : 1;
+
+if (size >= (depth - s->write_count)) {
+size = depth - s->write_count;
+s->flags |= PL011_FLAG_TXFF;
+}
+
+if (size > 0) {
+memcpy(s->write_fifo + s->write_count, buf, size);
+s->write_count += size;
+s->flags &= ~PL011_FLAG_TXFE;
+}
+
+pl011_xmit(NULL, G_IO_OUT, s);
+}
+
 static void pl011_write(void *opaque, hwaddr offset,
 uint64_t value, unsigned size)
 {
@@ -179,13 +239,8 @@ static void pl011_write(void *opaque, hwaddr offset,
 
 switch (offset >> 2) {
 case 0: /* UARTDR */
-/* ??? Check if transmitter is enabled.  */
 ch = value;
-/* XXX this blocks entire thread. Rewrite to use
- * qemu_chr_fe_write and background I/O callbacks */
-qemu_chr_fe_write_all(>chr, , 1);
-s->int_level |= PL011_INT_TX;
-pl011_update(s);
+pl011_write_fifo(opaque, , 1);
 break;
 case 1: /* UARTRSR/UARTECR */
 s->rsr = 0;
@@ -207,6 +262,7 @@ static void pl011_write(void *opaque, hwaddr offset,
 if ((s->lcr ^ value) & 0x10) {
 s->read_count = 0;
 s->read_pos = 0;
+s->write_count = 0;
 }
 s->lcr = value;
 pl011_set_read_trigger(s);
@@ -306,6 +362,7 @@ static const VMStateDescription vmstate_pl011 = {
 VMSTATE_UINT32(int_enabled, PL011State),
 VMSTATE_UINT32(int_level, PL011State),
 VMSTATE_UINT32_ARRAY(read_fifo, PL011State, 16),
+VMSTATE_UINT8_ARRAY(write_fifo, PL011State, 16),
 VMSTATE_UINT32(ilpr, PL011State),
 VMSTATE_UINT32(ibrd, PL011State),
 VMSTATE_UINT32(fbrd, PL011State),
@@ -313,6 +370,7 @@ static const VMStateDescription vmstate_pl011 = {
 VMSTATE_INT32(read_pos, PL011State),
 VMSTATE_INT32(read_count, PL011State),
 VMSTATE_INT32(read_trigger, PL011State),
+VMSTATE_INT32(write_count, PL011State),
 VMSTATE_END_OF_LIST()
 }
 };
diff --git a/include/hw/char/pl011.h b/include/hw/char/pl011.h
index 14187165c6..aeaf332eca 100644
--- a/include/hw/char/pl011.h
+++ b/include/hw/char/pl011.h
@@ -38,6 +38,7 @@ typedef struct PL011State {
 uint32_t int_enabled;
 uint32_t int_level;
 uint32_t read_fifo[16];
+uint8_t  write_fifo[16];
 uint32_t ilpr;
 uint32_t ibrd;
 uint32_t fbrd;
@@ -45,6 +46,7 @@ typedef struct PL011State {
 int read_pos;
 int read_count;
 int read_trigger;
+int write_count;
 CharBackend chr;
 qemu_irq irq[6];
 const unsigned char *id;
-- 
2.23.0




Re: [PATCH] hw/char/pl011: Output characters using best-effort mode

2020-02-20 Thread Gavin Shan

Hi Peter and Marc,

On 2/20/20 9:10 PM, Peter Maydell wrote:

On Thu, 20 Feb 2020 at 09:10, Marc Zyngier  wrote:

On 2020-02-20 06:01, Gavin Shan wrote:

This fixes the issue by using newly added API
qemu_chr_fe_try_write_all(),
which provides another type of service (best-effort). It's different
from
qemu_chr_fe_write_all() as the data will be dropped if the backend has
been running into so-called broken state or 50 attempts of
transmissions.
The broken state is cleared if the data is transmitted at once.


I don't think dropping the serial port output is an acceptable outcome.


Agreed. The correct fix for this is the one cryptically described
in the XXX comment this patch deletes:

-/* XXX this blocks entire thread. Rewrite to use
- * qemu_chr_fe_write and background I/O callbacks */

The idea is that essentially we end up emulating the real
hardware's transmit FIFO:
  * as data arrives from the guest we put it in the FIFO
  * we try to send the data with qemu_chr_fe_write(), which does
not block
  * if qemu_chr_fe_write() tells us it did not send all the data,
we use qemu_chr_fe_add_watch() to set up an I/O callback
which will get called when the output chardev has drained
enough that we can try again
  * we make sure all the guest visible registers and mechanisms
for tracking tx fifo level (status bits, interrupts, etc) are
correctly wired up

Then we don't lose data or block QEMU if the guest sends
faster than the chardev backend can handle, assuming the
guest is well-behaved -- just as with a real hardware slow
serial port, the guest will fill the tx fifo and then either poll
or wait for an interrupt telling it that the fifo has drained
before it tries to send more data.

There is an example of this in hw/char/cadence_uart.c
(and an example of how it works for a UART with no tx
fifo in hw/char-cmsdk-apb-uart.c, which is basically the
same except the 'fifo' is just one byte.)

You will also find an awful lot of XXX comments like the
above one in various UART models in hw/char, because
converting an old-style simple blocking UART implementation
to a non-blocking one is a bit fiddly and needs knowledge
of the specifics of the UART behaviour.

The other approach here would be that we could add
options to relevant chardev backends so the user
could say "if you couldn't connect to the tcp server I
specified, throw away data rather than waiting", where
we don't have suitable options already. If the user specifically
tells us they're ok to throw away the serial data, then it's
fine to throw away the serial data :-)



I was intended to convince Marc that it's fine to lose data if the
serial connection is broken with an example. Now, I'm taking the
example trying to convince both of you: Lets assume we have a ARM
board and the UART (RS232) cable is unplugged and plugged in the middle of
system booting. I think we would get some output lost. We're emulating
pl011 and I think it would have same behavior. However, I'm not sure
if it makes sense :)

Peter, I don't think qemu_chr_fe_add_watch() can help on the issue of
blocking system from booting. I had the code to use qemu_chr_fe_add_watch()
in pl011 driver as the attachment shows. The attached patch will be posted
for review shortly as I think it's valuable to support 16-character-in-depth
TxFIFO. The linux guest can't boot successfully if I had some code to strike
the early console. The serial is built on tcp connection (127.0.0.1:50900)
and the server side don't receive the incoming messages, as before. The root
cause is guest kernel is hold until the TxFIFO has available space. On the
other hand, the QEMU can't send the characters in TxFIFO to the backend
successfully, which means the TxFIFO is always full.

For the guest kernel, linux/drivers/tty/serial/amba-pl011.c::pl011_putc() is
used to write outgoing characters to TxFIFO. The transmission can't be finished
if there is no space in TxFIFO, indicated by UART01x_FR_TXFF.

   static void pl011_putc(struct uart_port *port, int c)
   {
   while (readl(port->membase + UART01x_FR) & UART01x_FR_TXFF)
   cpu_relax();
   if (port->iotype == UPIO_MEM32)
   writel(c, port->membase + UART01x_DR);
   else
   writeb(c, port->membase + UART01x_DR);
   while (readl(port->membase + UART01x_FR) & UART01x_FR_BUSY)
   cpu_relax();
   }

If above analysis is correct and the first approach doesn't work out. We have to
consider the 2nd approach - adding option to backend to allow losing data. I'm
going to add "allow-data-lost" option for TYPE_CHARDEV_SOCKET. With the option,
a back-off algorithm in tcp_chr_write(): The channel is consider as broken if
it fails to transmit data in last continuous 5 times. The transmission is still
issued when the channel is in broken state and recovered to normal state if
transmission succeeds for once.

Thanks,
Gavin




>From 

Re: [PULL SUBSYSTEM qemu-pseries] pseries: Update SLOF firmware image

2020-02-20 Thread Alexey Kardashevskiy



On 18/02/2020 16:48, Philippe Mathieu-Daudé wrote:
> On 2/17/20 11:46 PM, David Gibson wrote:
>> On Mon, Feb 17, 2020 at 11:24:11AM +0100, Philippe Mathieu-Daudé wrote:
>>> On 2/17/20 10:26 AM, Philippe Mathieu-Daudé wrote:
 Hi Alexey,

 On 2/17/20 3:12 AM, Alexey Kardashevskiy wrote:
> The following changes since commit
> 05943fb4ca41f626078014c0327781815c6584c5:
>
>     ppc: free 'fdt' after reset the machine (2020-02-17 11:27:23
> +1100)
>
> are available in the Git repository at:
>
>     g...@github.com:aik/qemu.git tags/qemu-slof-20200217
>
> for you to fetch changes up to
> ea9a03e5aa023c5391bab5259898475d0298aac2:
>
>     pseries: Update SLOF firmware image (2020-02-17 13:08:59 +1100)
>
> 
> Alexey Kardashevskiy (1):
>     pseries: Update SLOF firmware image
>
>    pc-bios/README   |   2 +-
>    pc-bios/slof.bin | Bin 931032 -> 968560 bytes
>    roms/SLOF    |   2 +-
>    3 files changed, 2 insertions(+), 2 deletions(-)

 I only received the cover, not the patch, have you posted it?
>>>
>>> OK I see the SLOF binary is almost 1MB. Maybe this got blocked by spam
>>> filter. FYI you can use 'git-format-patch --no-binary' to emit the patch
>>> with the commit description but without the content.
>>
>> Generally Alexey sends SLOF updates to me just as pull requests
>> without patches in full, because a huge slab of base64 encoded
>> firmware isn't particularly illuminating.
> 
> I understand, this is why I later suggested Alexey to use
> 'git-format-patch --no-binary', because Laszlo uses it for EDK2
> submodule, this allow to quickly review the change on the list (without
> posting the base64), see:
> 
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg624429.html
> (pull-request cover)
> 
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg624432.html
> "roms/edk2: update submodule"
> 
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg624435.html
> "pc-bios: refresh edk2 build artifacts"

I am not quite sure where to fit this "git-format-patch". I run now "git
request-pull" and "git send-email" so am I expected to run format-patch
and post it as a patchset but with the pull request mail as a cover
letter? This does not seem very useful though. For today, I'll add the
change log to the pull request mail. Thanks,



-- 
Alexey



Re: RFC: Split EPT huge pages in advance of dirty logging

2020-02-20 Thread Ben Gardon
On Thu, Feb 20, 2020 at 5:53 AM Zhoujian (jay)  wrote:
>
>
>
> > -Original Message-
> > From: Peter Xu [mailto:pet...@redhat.com]
> > Sent: Thursday, February 20, 2020 1:19 AM
> > To: Zhoujian (jay) 
> > Cc: k...@vger.kernel.org; qemu-devel@nongnu.org; pbonz...@redhat.com;
> > dgilb...@redhat.com; quint...@redhat.com; Liujinsong (Paul)
> > ; linfeng (M) ; wangxin (U)
> > ; Huangweidong (C)
> > 
> > Subject: Re: RFC: Split EPT huge pages in advance of dirty logging
> >
> > On Wed, Feb 19, 2020 at 01:19:08PM +, Zhoujian (jay) wrote:
> > > Hi Peter,
> > >
> > > > -Original Message-
> > > > From: Peter Xu [mailto:pet...@redhat.com]
> > > > Sent: Wednesday, February 19, 2020 1:43 AM
> > > > To: Zhoujian (jay) 
> > > > Cc: k...@vger.kernel.org; qemu-devel@nongnu.org;
> > pbonz...@redhat.com;
> > > > dgilb...@redhat.com; quint...@redhat.com; Liujinsong (Paul)
> > > > ; linfeng (M) ;
> > > > wangxin (U) ; Huangweidong (C)
> > > > 
> > > > Subject: Re: RFC: Split EPT huge pages in advance of dirty logging
> > > >
> > > > On Tue, Feb 18, 2020 at 01:13:47PM +, Zhoujian (jay) wrote:
> > > > > Hi all,
> > > > >
> > > > > We found that the guest will be soft-lockup occasionally when live
> > > > > migrating a 60 vCPU, 512GiB huge page and memory sensitive VM. The
> > > > > reason is clear, almost all of the vCPUs are waiting for the KVM
> > > > > MMU spin-lock to create 4K SPTEs when the huge pages are write
> > > > > protected. This
> > > > phenomenon is also described in this patch set:
> > > > > https://patchwork.kernel.org/cover/11163459/
> > > > > which aims to handle page faults in parallel more efficiently.
> > > > >
> > > > > Our idea is to use the migration thread to touch all of the guest
> > > > > memory in the granularity of 4K before enabling dirty logging. To
> > > > > be more specific, we split all the PDPE_LEVEL SPTEs into
> > > > > DIRECTORY_LEVEL SPTEs as the first step, and then split all the
> > > > > DIRECTORY_LEVEL SPTEs into
> > > > PAGE_TABLE_LEVEL SPTEs as the following step.
> > > >
> > > > IIUC, QEMU will prefer to use huge pages for all the anonymous
> > > > ramblocks (please refer to ram_block_add):
> > > >
> > > > qemu_madvise(new_block->host, new_block->max_length,
> > > > QEMU_MADV_HUGEPAGE);
> > >
> > > Yes, you're right
> > >
> > > >
> > > > Another alternative I can think of is to add an extra parameter to
> > > > QEMU to explicitly disable huge pages (so that can even be
> > > > MADV_NOHUGEPAGE instead of MADV_HUGEPAGE).  However that
> > should also
> > > > drag down the performance for the whole lifecycle of the VM.
> > >
> > > From the performance point of view, it is better to keep the huge
> > > pages when the VM is not in the live migration state.
> > >
> > > > A 3rd option is to make a QMP
> > > > command to dynamically turn huge pages on/off for ramblocks globally.
> > >
> > > We're searching a dynamic method too.
> > > We plan to add two new flags for each memory slot, say
> > > KVM_MEM_FORCE_PT_DIRECTORY_PAGES and
> > > KVM_MEM_FORCE_PT_PAGE_TABLE_PAGES. These flags can be set through
> > > KVM_SET_USER_MEMORY_REGION ioctl.
> > >
> > > The mapping_level which is called by tdp_page_fault in the kernel side
> > > will return PT_DIRECTORY_LEVEL if the
> > KVM_MEM_FORCE_PT_DIRECTORY_PAGES
> > > flag of the memory slot is set, and return PT_PAGE_TABLE_LEVEL if the
> > > KVM_MEM_FORCE_PT_PAGE_TABLE_PAGES flag is set.
> > >
> > > The key steps to split the huge pages in advance of enabling dirty log
> > > is as follows:
> > > 1. The migration thread in user space uses
> > KVM_SET_USER_MEMORY_REGION
> > > ioctl to set the KVM_MEM_FORCE_PT_DIRECTORY_PAGES flag for each
> > memory
> > > slot.
> > > 2. The migration thread continues to use the KVM_SPLIT_HUGE_PAGES
> > > ioctl (which is newly added) to do the splitting of large pages in the
> > > kernel side.
> > > 3. A new vCPU is created temporally(do some initialization but will
> > > not
> > > run) to help to do the work, i.e. as the parameter of the tdp_page_fault.
> > > 4. Collect the GPA ranges of all the memory slots with the
> > > KVM_MEM_FORCE_PT_DIRECTORY_PAGES flag set.
> > > 5. Split the 1G huge pages(collected in step 4) into 2M by calling
> > > tdp_page_fault, since the mapping_level will return
> > > PT_DIRECTORY_LEVEL. Here is the main difference from the usual path
> > > which is caused by the Guest side(EPT violation/misconfig etc), we
> > > call it directly in the hypervisor side.
> > > 6. Do some cleanups, i.e. free the vCPU related resources 7. The
> > > KVM_SPLIT_HUGE_PAGES ioctl returned to the user space side.
> > > 8. Using KVM_MEM_FORCE_PT_PAGE_TABLE_PAGES instread of
> > > KVM_MEM_FORCE_PT_DIRECTORY_PAGES to repeat step 1 ~ step 7, in step
> > 5
> > > the 2M huge pages will be splitted into 4K pages.
> > > 9. Clear the KVM_MEM_FORCE_PT_DIRECTORY_PAGES and
> > > KVM_MEM_FORCE_PT_PAGE_TABLE_PAGES flags for each memory slot.
> > > 10. Then the migration thread calls the log_start 

Re: RFC: Split EPT huge pages in advance of dirty logging

2020-02-20 Thread Ben Gardon
FWIW, we currently do this eager splitting at Google for live
migration. When the log-dirty-memory flag is set on a memslot we
eagerly split all pages in the slot down to 4k granularity.
As Jay said, this does not cause crippling lock contention because the
vCPU page faults generated by write protection / splitting can be
resolved in the fast page fault path without acquiring the MMU lock.
I believe +Junaid Shahid tried to upstream this approach at some point
in the past, but the patch set didn't make it in. (This was before my
time, so I'm hoping he has a link.)
I haven't done the analysis to know if eager splitting is more or less
efficient with parallel slow-path page faults, but it's definitely
faster under the MMU lock.

On Thu, Feb 20, 2020 at 5:53 AM Zhoujian (jay)  wrote:
>
>
>
> > -Original Message-
> > From: Peter Xu [mailto:pet...@redhat.com]
> > Sent: Thursday, February 20, 2020 1:19 AM
> > To: Zhoujian (jay) 
> > Cc: k...@vger.kernel.org; qemu-devel@nongnu.org; pbonz...@redhat.com;
> > dgilb...@redhat.com; quint...@redhat.com; Liujinsong (Paul)
> > ; linfeng (M) ; wangxin (U)
> > ; Huangweidong (C)
> > 
> > Subject: Re: RFC: Split EPT huge pages in advance of dirty logging
> >
> > On Wed, Feb 19, 2020 at 01:19:08PM +, Zhoujian (jay) wrote:
> > > Hi Peter,
> > >
> > > > -Original Message-
> > > > From: Peter Xu [mailto:pet...@redhat.com]
> > > > Sent: Wednesday, February 19, 2020 1:43 AM
> > > > To: Zhoujian (jay) 
> > > > Cc: k...@vger.kernel.org; qemu-devel@nongnu.org;
> > pbonz...@redhat.com;
> > > > dgilb...@redhat.com; quint...@redhat.com; Liujinsong (Paul)
> > > > ; linfeng (M) ;
> > > > wangxin (U) ; Huangweidong (C)
> > > > 
> > > > Subject: Re: RFC: Split EPT huge pages in advance of dirty logging
> > > >
> > > > On Tue, Feb 18, 2020 at 01:13:47PM +, Zhoujian (jay) wrote:
> > > > > Hi all,
> > > > >
> > > > > We found that the guest will be soft-lockup occasionally when live
> > > > > migrating a 60 vCPU, 512GiB huge page and memory sensitive VM. The
> > > > > reason is clear, almost all of the vCPUs are waiting for the KVM
> > > > > MMU spin-lock to create 4K SPTEs when the huge pages are write
> > > > > protected. This
> > > > phenomenon is also described in this patch set:
> > > > > https://patchwork.kernel.org/cover/11163459/
> > > > > which aims to handle page faults in parallel more efficiently.
> > > > >
> > > > > Our idea is to use the migration thread to touch all of the guest
> > > > > memory in the granularity of 4K before enabling dirty logging. To
> > > > > be more specific, we split all the PDPE_LEVEL SPTEs into
> > > > > DIRECTORY_LEVEL SPTEs as the first step, and then split all the
> > > > > DIRECTORY_LEVEL SPTEs into
> > > > PAGE_TABLE_LEVEL SPTEs as the following step.
> > > >
> > > > IIUC, QEMU will prefer to use huge pages for all the anonymous
> > > > ramblocks (please refer to ram_block_add):
> > > >
> > > > qemu_madvise(new_block->host, new_block->max_length,
> > > > QEMU_MADV_HUGEPAGE);
> > >
> > > Yes, you're right
> > >
> > > >
> > > > Another alternative I can think of is to add an extra parameter to
> > > > QEMU to explicitly disable huge pages (so that can even be
> > > > MADV_NOHUGEPAGE instead of MADV_HUGEPAGE).  However that
> > should also
> > > > drag down the performance for the whole lifecycle of the VM.
> > >
> > > From the performance point of view, it is better to keep the huge
> > > pages when the VM is not in the live migration state.
> > >
> > > > A 3rd option is to make a QMP
> > > > command to dynamically turn huge pages on/off for ramblocks globally.
> > >
> > > We're searching a dynamic method too.
> > > We plan to add two new flags for each memory slot, say
> > > KVM_MEM_FORCE_PT_DIRECTORY_PAGES and
> > > KVM_MEM_FORCE_PT_PAGE_TABLE_PAGES. These flags can be set through
> > > KVM_SET_USER_MEMORY_REGION ioctl.
> > >
> > > The mapping_level which is called by tdp_page_fault in the kernel side
> > > will return PT_DIRECTORY_LEVEL if the
> > KVM_MEM_FORCE_PT_DIRECTORY_PAGES
> > > flag of the memory slot is set, and return PT_PAGE_TABLE_LEVEL if the
> > > KVM_MEM_FORCE_PT_PAGE_TABLE_PAGES flag is set.
> > >
> > > The key steps to split the huge pages in advance of enabling dirty log
> > > is as follows:
> > > 1. The migration thread in user space uses
> > KVM_SET_USER_MEMORY_REGION
> > > ioctl to set the KVM_MEM_FORCE_PT_DIRECTORY_PAGES flag for each
> > memory
> > > slot.
> > > 2. The migration thread continues to use the KVM_SPLIT_HUGE_PAGES
> > > ioctl (which is newly added) to do the splitting of large pages in the
> > > kernel side.
> > > 3. A new vCPU is created temporally(do some initialization but will
> > > not
> > > run) to help to do the work, i.e. as the parameter of the tdp_page_fault.
> > > 4. Collect the GPA ranges of all the memory slots with the
> > > KVM_MEM_FORCE_PT_DIRECTORY_PAGES flag set.
> > > 5. Split the 1G huge pages(collected in step 4) into 2M by calling
> > > tdp_page_fault, since 

Re: [PULL SUBSYSTEM qemu-pseries] pseries: Update SLOF firmware image

2020-02-20 Thread Alexey Kardashevskiy
Uff. Forgot to add that his replaces the update tagged as
qemu-slof-20200217, the difference from that is this fixes handling of
64bit ext4 and removes

"disk-label: Try ext2 filesystem when booting from GPT partition"

as it did not help with what I wanted to achieve (zImage loading into
slof from ext4 - there are still problems) and it broke things elsewhere.


On 21/02/2020 14:52, Alexey Kardashevskiy wrote:
> The following changes since commit 438bafcac55308eef4f9029c94dbadd2c7ac3bb7:
> 
>   hw/ppc/virtex_ml507:fix leak of fdevice tree blob (2020-02-21 09:15:04 
> +1100)
> 
> are available in the Git repository at:
> 
>   g...@github.com:aik/qemu.git tags/qemu-slof-20200221
> 
> for you to fetch changes up to fcd350cb4646255845f643cdb1711bb7004038e7:
> 
>   pseries: Update SLOF firmware image (2020-02-21 14:20:23 +1100)
> 
> 
> Alexey Kardashevskiy (1):
>   pseries: Update SLOF firmware image
> 
>  pc-bios/README   |   2 +-
>  pc-bios/slof.bin | Bin 931032 -> 968616 bytes
>  roms/SLOF|   2 +-
>  3 files changed, 2 insertions(+), 2 deletions(-)
> 
> 
> *** Note: this is not for master, this is for pseries
> 
> The full changelog is:
> 
> Alexey Kardashevskiy (10):
>   disk-label: Prepare for extenting
>   disk-label: Support Linux GPT partition type
>   ext2: Prepare for extending
>   ext2: Rename group-desc-size
>   ext2: Read size of group descriptors
>   ext2: Read all 64bit of inode number
>   ext2/4: Add basic extent tree support
>   elf64: Add LE64 ABIv1/2 support for loading images to given address
>   fdt: Fix creating new nodes at H_CAS
>   version: update to 20200221
> 
> Greg Kurz (2):
>   fdt: Fix update of "interrupt-controller" node at CAS
>   fdt: Delete nodes of devices removed between boot and CAS
> 
> Stefan Berger (8):
>   slof: Implement SLOF_get_keystroke() and SLOF_reset()
>   slof: Make linker script variables accessible
>   qemu: Make print_version variable accessible
>   tpm: Add TPM CRQ driver implementation
>   tpm: Add sha256 implementation
>   tcgbios: Add TPM 2.0 support and firmware API
>   tcgbios: Implement menu to clear TPM 2 and activate its PCR banks
>   tcgbios: Measure the GPT table
> 

-- 
Alexey



[PULL SUBSYSTEM qemu-pseries] pseries: Update SLOF firmware image

2020-02-20 Thread Alexey Kardashevskiy
The following changes since commit 438bafcac55308eef4f9029c94dbadd2c7ac3bb7:

  hw/ppc/virtex_ml507:fix leak of fdevice tree blob (2020-02-21 09:15:04 +1100)

are available in the Git repository at:

  g...@github.com:aik/qemu.git tags/qemu-slof-20200221

for you to fetch changes up to fcd350cb4646255845f643cdb1711bb7004038e7:

  pseries: Update SLOF firmware image (2020-02-21 14:20:23 +1100)


Alexey Kardashevskiy (1):
  pseries: Update SLOF firmware image

 pc-bios/README   |   2 +-
 pc-bios/slof.bin | Bin 931032 -> 968616 bytes
 roms/SLOF|   2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)


*** Note: this is not for master, this is for pseries

The full changelog is:

Alexey Kardashevskiy (10):
  disk-label: Prepare for extenting
  disk-label: Support Linux GPT partition type
  ext2: Prepare for extending
  ext2: Rename group-desc-size
  ext2: Read size of group descriptors
  ext2: Read all 64bit of inode number
  ext2/4: Add basic extent tree support
  elf64: Add LE64 ABIv1/2 support for loading images to given address
  fdt: Fix creating new nodes at H_CAS
  version: update to 20200221

Greg Kurz (2):
  fdt: Fix update of "interrupt-controller" node at CAS
  fdt: Delete nodes of devices removed between boot and CAS

Stefan Berger (8):
  slof: Implement SLOF_get_keystroke() and SLOF_reset()
  slof: Make linker script variables accessible
  qemu: Make print_version variable accessible
  tpm: Add TPM CRQ driver implementation
  tpm: Add sha256 implementation
  tcgbios: Add TPM 2.0 support and firmware API
  tcgbios: Implement menu to clear TPM 2 and activate its PCR banks
  tcgbios: Measure the GPT table



[PATCH] target: i386: Check float overflow about register stack

2020-02-20 Thread chengang
From: Chen Gang 

The fxam instruction also checks the register stack overflow, which can
be get by the following fstsw instruction. The related code is below, it
works well under real x86_64 hardware, but can not work under qemu-i386.

0006b63c <_CIsqrt>:
   6b63c:   55  push   %ebp
   6b63d:   89 e5   mov%esp,%ebp
   6b63f:   83 ec 44sub$0x44,%esp
   6b642:   dd 1c 24fstpl  (%esp)
   6b645:   9b  fwait
   6b646:   e8 d5 04 00 00  call   6bb20 
   6b64b:   b9 01 00 00 00  mov$0x1,%ecx
   6b650:   d9 e5   fxam
   6b652:   9b df e0fstsw  %ax
   6b655:   66 25 00 45 and$0x4500,%ax
   6b659:   66 3d 00 41 cmp$0x4100,%ax
   6b65d:   74 07   je 6b666 <_CIsqrt+0x2a>
   6b65f:   dd 1c ccfstpl  (%esp,%ecx,8)
   6b662:   9b  fwait
   6b663:   41  inc%ecx
   6b664:   eb ea   jmp6b650 <_CIsqrt+0x14>
   6b666:   89 4d fcmov%ecx,-0x4(%ebp)
   6b669:   e8 b2 0f 00 00  call   6c620 
   6b66e:   8b 4d fcmov-0x4(%ebp),%ecx
   6b671:   dd 1c 24fstpl  (%esp)
   6b674:   49  dec%ecx
   6b675:   dd 04 ccfldl   (%esp,%ecx,8)
   6b678:   83 f9 00cmp$0x0,%ecx
   6b67b:   75 f7   jne6b674 <_CIsqrt+0x38>
   6b67d:   c9  leave
   6b67e:   c3  ret
   6b67f:   90  nop

Signed-off-by: Chen Gang 
---
 target/i386/cpu.h|  1 +
 target/i386/fpu_helper.c | 70 
 2 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 576f309bbf..3e2b719ab7 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1394,6 +1394,7 @@ typedef struct CPUX86State {
 struct {} start_init_save;
 
 /* FPU state */
+bool foverflow;
 unsigned int fpstt; /* top of stack index */
 uint16_t fpus;
 uint16_t fpuc;
diff --git a/target/i386/fpu_helper.c b/target/i386/fpu_helper.c
index 99f28f267f..81f3cefe8b 100644
--- a/target/i386/fpu_helper.c
+++ b/target/i386/fpu_helper.c
@@ -91,17 +91,31 @@ void cpu_set_ignne(void)
 }
 #endif
 
+static inline void set_fpstt(CPUX86State *env, unsigned int fpstt,
+ bool pop, bool full)
+{
+env->foverflow = (fpstt > 7) && full; /* clear the original flag */
+if (pop) {
+if (full) {
+env->fptags[env->fpstt] = 1; /* invalidate stack entry */
+}
+env->fpstt = fpstt & 7;
+} else {
+env->fpstt = fpstt & 7;
+if (full) {
+env->fptags[env->fpstt] = 0; /* validate stack entry */
+}
+}
+}
 
 static inline void fpush(CPUX86State *env)
 {
-env->fpstt = (env->fpstt - 1) & 7;
-env->fptags[env->fpstt] = 0; /* validate stack entry */
+set_fpstt(env, env->fpstt - 1, false, true);
 }
 
 static inline void fpop(CPUX86State *env)
 {
-env->fptags[env->fpstt] = 1; /* invalidate stack entry */
-env->fpstt = (env->fpstt + 1) & 7;
+set_fpstt(env, env->fpstt + 1, true, true);
 }
 
 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
@@ -211,11 +225,10 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val)
 uint32_t i;
 } u;
 
-new_fpstt = (env->fpstt - 1) & 7;
+new_fpstt = env->fpstt - 1;
 u.i = val;
-env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, >fp_status);
-env->fpstt = new_fpstt;
-env->fptags[new_fpstt] = 0; /* validate stack entry */
+env->fpregs[new_fpstt & 7].d = float32_to_floatx80(u.f, >fp_status);
+set_fpstt(env, new_fpstt, false, true);
 }
 
 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
@@ -226,31 +239,28 @@ void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 uint64_t i;
 } u;
 
-new_fpstt = (env->fpstt - 1) & 7;
+new_fpstt = env->fpstt - 1;
 u.i = val;
-env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, >fp_status);
-env->fpstt = new_fpstt;
-env->fptags[new_fpstt] = 0; /* validate stack entry */
+env->fpregs[new_fpstt & 7].d = float64_to_floatx80(u.f, >fp_status);
+set_fpstt(env, new_fpstt, false, true);
 }
 
 void helper_fildl_ST0(CPUX86State *env, int32_t val)
 {
 int new_fpstt;
 
-new_fpstt = (env->fpstt - 1) & 7;
-env->fpregs[new_fpstt].d = int32_to_floatx80(val, >fp_status);
-env->fpstt = new_fpstt;
-env->fptags[new_fpstt] = 0; /* validate stack entry */
+new_fpstt = env->fpstt - 1;
+env->fpregs[new_fpstt & 7].d = int32_to_floatx80(val, >fp_status);
+set_fpstt(env, new_fpstt, false, true);
 }
 
 void helper_fildll_ST0(CPUX86State *env, int64_t val)
 {
  

Re: [PATCH v2 2/2] hw: move timer_new from init() into realize() to avoid memleaks

2020-02-20 Thread Pan Nengyuan



On 2/21/2020 1:56 AM, Peter Maydell wrote:
> On Mon, 17 Feb 2020 at 03:22,  wrote:
>>
>> From: Pan Nengyuan 
>>
>> There are some memleaks when we call 'device_list_properties'. This patch 
>> move timer_new from init into realize to fix it.
>> Meanwhile, do the null check in mos6522_reset() to avoid null deref if we 
>> move timer_new into realize().
>>
>> Reported-by: Euler Robot 
>> Signed-off-by: Pan Nengyuan 
>> Reviewed-by: Philippe Mathieu-Daudé 
> 
> 
>> diff --git a/hw/misc/mos6522.c b/hw/misc/mos6522.c
>> index 19e154b870..980eda7599 100644
>> --- a/hw/misc/mos6522.c
>> +++ b/hw/misc/mos6522.c
>> @@ -465,11 +465,15 @@ static void mos6522_reset(DeviceState *dev)
>>  s->timers[0].frequency = s->frequency;
>>  s->timers[0].latch = 0x;
>>  set_counter(s, >timers[0], 0x);
>> -timer_del(s->timers[0].timer);
>> +if (s->timers[0].timer) {
>> +timer_del(s->timers[0].timer);
>> +}
>>
>>  s->timers[1].frequency = s->frequency;
>>  s->timers[1].latch = 0x;
>> -timer_del(s->timers[1].timer);
>> +if (s->timers[1].timer) {
>> +timer_del(s->timers[1].timer);
>> +}
>>  }
> 
> What code path calls a device 'reset' method on a device
> that has not yet been realized ? I wasn't expecting that
> to be valid...

I got the follow null-deref case on m68k If I move timer_new into realize():

#0 0x55cbb0d3e9f9 in timer_del /mnt/sdb/qemu-new/qemu/util/qemu-timer.c:429
#1 0x55cbb04f3abe in mos6522_reset 
/mnt/sdb/qemu-new/qemu/hw/misc/mos6522.c:468
#2 0x55cbb02b5fd5 in mos6522_q800_via2_reset 
/mnt/sdb/qemu-new/qemu/hw/misc/mac_via.c:1098
#3 0x55cbb047b926 in device_transitional_reset 
/mnt/sdb/qemu-new/qemu/hw/core/qdev.c:1136
#4 0x55cbb0491a71 in resettable_phase_hold 
/mnt/sdb/qemu-new/qemu/hw/core/resettable.c:182
#5 0x55cbb048700e in bus_reset_child_foreach 
/mnt/sdb/qemu-new/qemu/hw/core/bus.c:94
#6 0x55cbb0490f66 in resettable_child_foreach 
/mnt/sdb/qemu-new/qemu/hw/core/resettable.c:96
#7 0x55cbb0491896 in resettable_phase_hold 
/mnt/sdb/qemu-new/qemu/hw/core/resettable.c:173
#8 0x55cbb0490c06 in resettable_assert_reset 
/mnt/sdb/qemu-new/qemu/hw/core/resettable.c:60
#9 0x55cbb0490aec in resettable_reset 
/mnt/sdb/qemu-new/qemu/hw/core/resettable.c:45
#10 0x55cbb0492668 in resettable_cold_reset_fn 
/mnt/sdb/qemu-new/qemu/hw/core/resettable.c:269
#11 0x55cbb0494a04 in qemu_devices_reset 
/mnt/sdb/qemu-new/qemu/hw/core/reset.c:69
#12 0x55cbb03ab91d in qemu_system_reset /mnt/sdb/qemu-new/qemu/vl.c:1412
#13 0x55cbb03bfe04 in main /mnt/sdb/qemu-new/qemu/vl.c:4403

mos6522_init was called in mac_via_realize as follow, but mos6522_realize was 
not called at all.
So maybe we shouldn't move it into realize or add realize step in this code 
path?

#0  0x55789e40 in mos6522_init (obj=0x57537b00) at 
/mnt/sdb/qemu-new/qemu/hw/misc/mos6522.c:476
#1  0x5581b6c3 in object_init_with_type (obj=0x57537b00, 
ti=0x5617c2b0) at /mnt/sdb/qemu-new/qemu/qom/object.c:372
#2  0x5581cc80 in object_initialize_with_type 
(data=data@entry=0x57537b00, size=1504, type=0x5617c2b0) at 
/mnt/sdb/qemu-new/qemu/qom/object.c:516
#3  0x5581cd1f in object_initialize 
(data=data@entry=0x57537b00, size=, typename=) at /mnt/sdb/qemu-new/qemu/qom/object.c:529
#4  0x5581e387 in object_initialize_childv
(parentobj=0x57537510, propname=0x55a3c673 "via1", 
childobj=0x57537b00, size=, type=, 
errp=0x5613b338 , vargs=0x7fffdb30)
at /mnt/sdb/qemu-new/qemu/qom/object.c:552
#5  0x5581e4d3 in object_initialize_child
(parentobj=, propname=, 
childobj=childobj@entry=0x57537b00, size=, type=, errp=) at /mnt/sdb/qemu-new/qemu/qom/object.c:539
#6  0x5577ba88 in sysbus_init_child_obj (parent=, 
childname=, child=0x57537b00, childsize=, 
childtype=)
at /mnt/sdb/qemu-new/qemu/hw/core/sysbus.c:352
#7  0x5570d301 in mac_via_realize (dev=0x57537510, 
errp=0x7fffdce0) at /mnt/sdb/qemu-new/qemu/hw/misc/mac_via.c:876
#8  0x5577 in device_set_realized (obj=0x57537510, 
value=, errp=0x7fffddd0) at 
/mnt/sdb/qemu-new/qemu/hw/core/qdev.c:891
#9  0x5581b266 in property_set_bool (obj=0x57537510, 
v=, name=, opaque=0x56165f50, 
errp=0x7fffddd0) at /mnt/sdb/qemu-new/qemu/qom/object.c:2238
#10 0x5581feee in object_property_set_qobject (obj=0x57537510, 
value=, name=0x55a5fa67 "realized", errp=0x7fffddd0) at 
/mnt/sdb/qemu-new/qemu/qom/qom-qobject.c:26
#11 0x5581d60f in object_property_set_bool (obj=0x57537510, 
value=, name=0x55a5fa67 "realized", errp=0x7fffddd0) at 
/mnt/sdb/qemu-new/qemu/qom/object.c:1390
#12 0x55773381 in qdev_init_nofail (dev=dev@entry=0x57537510) 
at /mnt/sdb/qemu-new/qemu/hw/core/qdev.c:418
#13 0x55711fcd in q800_init (machine=) at 

[PULL 17/20] ppc: free 'fdt' after reset the machine

2020-02-20 Thread David Gibson
From: Pan Nengyuan 

'fdt' forgot to clean both e500 and pnv when we call 'system_reset' on ppc,
this patch fix it. The leak stacks are as follow:

Direct leak of 4194304 byte(s) in 4 object(s) allocated from:
#0 0x7fafe37dd970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970)
#1 0x7fafe2e3149d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d)
#2 0x561876f7f80d in create_device_tree 
/mnt/sdb/qemu-new/qemu/device_tree.c:40
#3 0x561876b7ac29 in ppce500_load_device_tree 
/mnt/sdb/qemu-new/qemu/hw/ppc/e500.c:364
#4 0x561876b7f437 in ppce500_reset_device_tree 
/mnt/sdb/qemu-new/qemu/hw/ppc/e500.c:617
#5 0x56187718b1ae in qemu_devices_reset 
/mnt/sdb/qemu-new/qemu/hw/core/reset.c:69
#6 0x561876f6938d in qemu_system_reset /mnt/sdb/qemu-new/qemu/vl.c:1412
#7 0x561876f6a25b in main_loop_should_exit /mnt/sdb/qemu-new/qemu/vl.c:1645
#8 0x561876f6a398 in main_loop /mnt/sdb/qemu-new/qemu/vl.c:1679
#9 0x561876f7da8e in main /mnt/sdb/qemu-new/qemu/vl.c:4438
#10 0x7fafde16b812 in __libc_start_main ../csu/libc-start.c:308
#11 0x5618765c055d in _start 
(/mnt/sdb/qemu-new/qemu/build/ppc64-softmmu/qemu-system-ppc64+0x2b1555d)

Direct leak of 1048576 byte(s) in 1 object(s) allocated from:
#0 0x7fc0a6f1b970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970)
#1 0x7fc0a656f49d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d)
#2 0x55eb05acd2ca in pnv_dt_create /mnt/sdb/qemu-new/qemu/hw/ppc/pnv.c:507
#3 0x55eb05ace5bf in pnv_reset /mnt/sdb/qemu-new/qemu/hw/ppc/pnv.c:578
#4 0x55eb05f2f395 in qemu_system_reset /mnt/sdb/qemu-new/qemu/vl.c:1410
#5 0x55eb05f43850 in main /mnt/sdb/qemu-new/qemu/vl.c:4403
#6 0x7fc0a18a9812 in __libc_start_main ../csu/libc-start.c:308
#7 0x55eb0558655d in _start 
(/mnt/sdb/qemu-new/qemu/build/ppc64-softmmu/qemu-system-ppc64+0x2b1555d)

Reported-by: Euler Robot 
Signed-off-by: Pan Nengyuan 
Message-Id: <20200214033206.4395-1-pannengy...@huawei.com>
Reviewed-by: Greg Kurz 
Signed-off-by: David Gibson 
---
 hw/ppc/e500.c | 1 +
 hw/ppc/pnv.c  | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 886442e54f..af537bba2b 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -594,6 +594,7 @@ done:
 cpu_physical_memory_write(addr, fdt, fdt_size);
 }
 ret = fdt_size;
+g_free(fdt);
 
 out:
 g_free(pci_map);
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 139c857b1e..e98038b809 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -582,6 +582,8 @@ static void pnv_reset(MachineState *machine)
 
 qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
 cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
+
+g_free(fdt);
 }
 
 static ISABus *pnv_chip_power8_isa_create(PnvChip *chip, Error **errp)
-- 
2.24.1




[PULL 19/20] spapr: Fix handling of unplugged devices during CAS and migration

2020-02-20 Thread David Gibson
From: Greg Kurz 

We already detect if a device is being hot plugged before CAS to trigger
a CAS reboot and during migration to migrate the state of the associated
DRC. But hot unplugging a device is also an asynchronous operation that
requires the guest to take action. This means that if the guest is migrated
after the hot unplug event was sent but before it could release the device
with RTAS, the destination QEMU doesn't know about the pending unplug
operation and doesn't actually remove the device when the guest finally
releases it.

Similarly, if the unplug request is fired before CAS, the guest isn't
notified of the change, just like with hotplug. It ends up booting with
the device still present in the DT and configures it, just like it was
never removed. Even weirder, since the event is still queued, it will
be eventually processed when some other unrelated event is posted to
the guest.

Enhance spapr_drc_transient() to also return true if an unplug request is
pending. This fixes the issue at CAS with a CAS reboot request and
causes the DRC state to be migrated. Some extra care is still needed to
inform the destination that an unplug request is pending : migrate the
unplug_requested field of the DRC in an optional subsection. This might
break backwards migration, but this is still better than ending with
an inconsistent guest.

Signed-off-by: Greg Kurz 
Message-Id: <158169248798.3465937.1108351365840514270.st...@bahia.lan>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_drc.c | 25 +++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 4c35ce7c5c..e373d342eb 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -456,6 +456,22 @@ void spapr_drc_reset(SpaprDrc *drc)
 }
 }
 
+static bool spapr_drc_unplug_requested_needed(void *opaque)
+{
+return spapr_drc_unplug_requested(opaque);
+}
+
+static const VMStateDescription vmstate_spapr_drc_unplug_requested = {
+.name = "spapr_drc/unplug_requested",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = spapr_drc_unplug_requested_needed,
+.fields  = (VMStateField []) {
+VMSTATE_BOOL(unplug_requested, SpaprDrc),
+VMSTATE_END_OF_LIST()
+}
+};
+
 bool spapr_drc_transient(SpaprDrc *drc)
 {
 SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
@@ -471,9 +487,10 @@ bool spapr_drc_transient(SpaprDrc *drc)
 /*
  * We need to reset the DRC at CAS or to migrate the DRC state if it's
  * not equal to the expected long-term state, which is the same as the
- * coldplugged initial state.
+ * coldplugged initial state, or if an unplug request is pending.
  */
-return (drc->state != drck->ready_state);
+return drc->state != drck->ready_state ||
+spapr_drc_unplug_requested(drc);
 }
 
 static bool spapr_drc_needed(void *opaque)
@@ -489,6 +506,10 @@ static const VMStateDescription vmstate_spapr_drc = {
 .fields  = (VMStateField []) {
 VMSTATE_UINT32(state, SpaprDrc),
 VMSTATE_END_OF_LIST()
+},
+.subsections = (const VMStateDescription * []) {
+_spapr_drc_unplug_requested,
+NULL
 }
 };
 
-- 
2.24.1




[PULL 15/20] target/ppc/cpu.h: Move fpu related members closer in cpu env

2020-02-20 Thread David Gibson
From: BALATON Zoltan 

Move fp_status and fpscr closer to other floating point and vector
related members in cpu env definition so they are in one group.

Signed-off-by: BALATON Zoltan 
Message-Id: 
<5b50e9e7eec2c383ae878b397d0b2927efc9ea43.158134.git.bala...@eik.bme.hu>
Signed-off-by: David Gibson 
---
 target/ppc/cpu.h | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 07dd2b4da7..c3b0a00064 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -997,11 +997,6 @@ struct CPUPPCState {
 /* temporary general purpose registers */
 target_ulong tgpr[4]; /* Used to speed-up TLB assist handlers */
 
-/* Floating point execution context */
-float_status fp_status;
-/* floating point status and control register */
-target_ulong fpscr;
-
 /* Next instruction pointer */
 target_ulong nip;
 
@@ -1060,6 +1055,10 @@ struct CPUPPCState {
  * used simultaneously
  */
 float_status vec_status;
+/* Floating point execution context */
+float_status fp_status;
+/* floating point status and control register */
+target_ulong fpscr;
 
 /* Internal devices resources */
 /* Time base and decrementer */
-- 
2.24.1




[PULL 20/20] hw/ppc/virtex_ml507:fix leak of fdevice tree blob

2020-02-20 Thread David Gibson
From: Chen Qun 

The device tree blob returned by load_device_tree is malloced.
We should free it after cpu_physical_memory_write().

Reported-by: Euler Robot 
Signed-off-by: Chen Qun 
Message-Id: <20200218091154.21696-3-kuhn.chen...@huawei.com>
Signed-off-by: David Gibson 
---
 hw/ppc/virtex_ml507.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index 91dd00ee91..4eef70069f 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -188,6 +188,7 @@ static int xilinx_load_device_tree(hwaddr addr,
 if (r < 0)
 fprintf(stderr, "couldn't set /chosen/bootargs\n");
 cpu_physical_memory_write(addr, fdt, fdt_size);
+g_free(fdt);
 return fdt_size;
 }
 
-- 
2.24.1




[PULL 18/20] spapr: Don't use spapr_drc_needed() in CAS code

2020-02-20 Thread David Gibson
From: Greg Kurz 

We currently don't support hotplug of devices between boot and CAS. If
this happens a CAS reboot is triggered. We detect this during CAS using
the spapr_drc_needed() function which is essentially a VMStateDescription
.needed callback. Even if the condition for CAS reboot happens to be the
same as for DRC migration, it looks wrong to piggyback a migration helper
for this.

Introduce a helper with slightly more explicit name and use it in both CAS
and DRC migration code. Since a subsequent patch will enhance this helper
to cover the case of hot unplug, let's go for spapr_drc_transient(). While
here convert spapr_hotplugged_dev_before_cas() to the "transient" wording as
well.

This doesn't change any behaviour.

Signed-off-by: Greg Kurz 
Message-Id: <158169248180.3465937.9531405453362718771.st...@bahia.lan>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_drc.c | 20 ++--
 hw/ppc/spapr_hcall.c   | 14 +-
 include/hw/ppc/spapr_drc.h |  4 +++-
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index fc62e04901..4c35ce7c5c 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -456,23 +456,31 @@ void spapr_drc_reset(SpaprDrc *drc)
 }
 }
 
-bool spapr_drc_needed(void *opaque)
+bool spapr_drc_transient(SpaprDrc *drc)
 {
-SpaprDrc *drc = (SpaprDrc *)opaque;
 SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 
-/* If no dev is plugged in there is no need to migrate the DRC state */
+/*
+ * If no dev is plugged in there is no need to migrate the DRC state
+ * nor to reset the DRC at CAS.
+ */
 if (!drc->dev) {
 return false;
 }
 
 /*
- * We need to migrate the state if it's not equal to the expected
- * long-term state, which is the same as the coldplugged initial
- * state */
+ * We need to reset the DRC at CAS or to migrate the DRC state if it's
+ * not equal to the expected long-term state, which is the same as the
+ * coldplugged initial state.
+ */
 return (drc->state != drck->ready_state);
 }
 
+static bool spapr_drc_needed(void *opaque)
+{
+return spapr_drc_transient(opaque);
+}
+
 static const VMStateDescription vmstate_spapr_drc = {
 .name = "spapr_drc",
 .version_id = 1,
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index b8bb66b5c0..6db3dbde9c 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1640,20 +1640,24 @@ static uint32_t cas_check_pvr(SpaprMachineState *spapr, 
PowerPCCPU *cpu,
 return best_compat;
 }
 
-static bool spapr_hotplugged_dev_before_cas(void)
+static bool spapr_transient_dev_before_cas(void)
 {
-Object *drc_container, *obj;
+Object *drc_container;
 ObjectProperty *prop;
 ObjectPropertyIterator iter;
 
 drc_container = container_get(object_get_root(), "/dr-connector");
 object_property_iter_init(, drc_container);
 while ((prop = object_property_iter_next())) {
+SpaprDrc *drc;
+
 if (!strstart(prop->type, "link<", NULL)) {
 continue;
 }
-obj = object_property_get_link(drc_container, prop->name, NULL);
-if (spapr_drc_needed(obj)) {
+drc = SPAPR_DR_CONNECTOR(object_property_get_link(drc_container,
+  prop->name, NULL));
+
+if (spapr_drc_transient(drc)) {
 return true;
 }
 }
@@ -1830,7 +1834,7 @@ static target_ulong 
h_client_architecture_support(PowerPCCPU *cpu,
 
 spapr_irq_update_active_intc(spapr);
 
-if (spapr_hotplugged_dev_before_cas()) {
+if (spapr_transient_dev_before_cas()) {
 spapr->cas_reboot = true;
 }
 
diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
index df3d958a66..21af8deac1 100644
--- a/include/hw/ppc/spapr_drc.h
+++ b/include/hw/ppc/spapr_drc.h
@@ -278,7 +278,9 @@ int spapr_dt_drc(void *fdt, int offset, Object *owner, 
uint32_t drc_type_mask);
 
 void spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp);
 void spapr_drc_detach(SpaprDrc *drc);
-bool spapr_drc_needed(void *opaque);
+
+/* Returns true if a hot plug/unplug request is pending */
+bool spapr_drc_transient(SpaprDrc *drc);
 
 static inline bool spapr_drc_unplug_requested(SpaprDrc *drc)
 {
-- 
2.24.1




[PULL 10/20] pnv/phb3: Convert 1u to 1ull

2020-02-20 Thread David Gibson
From: Greg Kurz 

As reported by Coverity defect CID 1419397, the 'j' variable goes up to
63 and shouldn't be used to left shift a 32-bit integer.

The result of the operation goes to a 64-bit integer : use a 64-bit
constant.

Reported-by: Coverity CID 1419397 Bad bit shift operation
Fixes: 9ae1329ee2fe "ppc/pnv: Add models for POWER8 PHB3 PCIe Host bridge"
Signed-off-by: Greg Kurz 
Message-Id: <158153364010.3229002.8004283672455615950.st...@bahia.lan>
Signed-off-by: David Gibson 
---
 hw/pci-host/pnv_phb3_msi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/pci-host/pnv_phb3_msi.c b/hw/pci-host/pnv_phb3_msi.c
index ecfc1b2c4e..d645468f4a 100644
--- a/hw/pci-host/pnv_phb3_msi.c
+++ b/hw/pci-host/pnv_phb3_msi.c
@@ -220,7 +220,7 @@ static void phb3_msi_resend(ICSState *ics)
 if ((msi->rba[i] & (1ull << j)) == 0) {
 continue;
 }
-msi->rba[i] &= ~(1u << j);
+msi->rba[i] &= ~(1ull << j);
 phb3_msi_try_send(msi, i * 64 + j, true);
 }
 }
-- 
2.24.1




[PULL 16/20] target/ppc/cpu.h: Clean up comments in the struct CPUPPCState definition

2020-02-20 Thread David Gibson
From: BALATON Zoltan 

The cpu env struct is quite complex but comments supposed to explain
it in its definition just make it harder to read. Reformat and reword
some comments to make it clearer and more readable.

Signed-off-by: BALATON Zoltan 
Message-Id: 
<8707144ab1ccf9c5c89a39c2d7a0b02307ca25d4.158134.git.bala...@eik.bme.hu>
Signed-off-by: David Gibson 
---
 target/ppc/cpu.h | 145 ++-
 1 file changed, 54 insertions(+), 91 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index c3b0a00064..b283042515 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -960,116 +960,88 @@ struct ppc_radix_page_info {
 #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
 
 struct CPUPPCState {
-/*
- * First are the most commonly used resources during translated
- * code execution
- */
-/* general purpose registers */
-target_ulong gpr[32];
-/* Storage for GPR MSB, used by the SPE extension */
-target_ulong gprh[32];
-/* LR */
+/* Most commonly used resources during translated code execution first */
+target_ulong gpr[32];  /* general purpose registers */
+target_ulong gprh[32]; /* storage for GPR MSB, used by the SPE extension */
 target_ulong lr;
-/* CTR */
 target_ulong ctr;
-/* condition register */
-uint32_t crf[8];
+uint32_t crf[8];   /* condition register */
 #if defined(TARGET_PPC64)
-/* CFAR */
 target_ulong cfar;
 #endif
-/* XER (with SO, OV, CA split out) */
-target_ulong xer;
+target_ulong xer;  /* XER (with SO, OV, CA split out) */
 target_ulong so;
 target_ulong ov;
 target_ulong ca;
 target_ulong ov32;
 target_ulong ca32;
-/* Reservation address */
-target_ulong reserve_addr;
-/* Reservation value */
-target_ulong reserve_val;
-target_ulong reserve_val2;
 
-/* Those ones are used in supervisor mode only */
-/* machine state register */
-target_ulong msr;
-/* temporary general purpose registers */
-target_ulong tgpr[4]; /* Used to speed-up TLB assist handlers */
+target_ulong reserve_addr; /* Reservation address */
+target_ulong reserve_val;  /* Reservation value */
+target_ulong reserve_val2;
 
-/* Next instruction pointer */
-target_ulong nip;
+/* These are used in supervisor mode only */
+target_ulong msr;  /* machine state register */
+target_ulong tgpr[4];  /* temporary general purpose registers, */
+   /* used to speed-up TLB assist handlers */
 
-/* High part of 128-bit helper return.  */
-uint64_t retxh;
+target_ulong nip;  /* next instruction pointer */
+uint64_t retxh;/* high part of 128-bit helper return */
 
 /* when a memory exception occurs, the access type is stored here */
 int access_type;
 
-/* MMU context - only relevant for full system emulation */
 #if !defined(CONFIG_USER_ONLY)
+/* MMU context, only relevant for full system emulation */
 #if defined(TARGET_PPC64)
-/* PowerPC 64 SLB area */
-ppc_slb_t slb[MAX_SLB_ENTRIES];
-/* tcg TLB needs flush (deferred slb inval instruction typically) */
+ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */
 #endif
-/* segment registers */
-target_ulong sr[32];
-/* BATs */
-uint32_t nb_BATs;
+target_ulong sr[32];   /* segment registers */
+uint32_t nb_BATs;  /* number of BATs */
 target_ulong DBAT[2][8];
 target_ulong IBAT[2][8];
 /* PowerPC TLB registers (for 4xx, e500 and 60x software driven TLBs) */
-int32_t nb_tlb;  /* Total number of TLB  */
+int32_t nb_tlb;  /* Total number of TLB */
 int tlb_per_way; /* Speed-up helper: used to avoid divisions at run time */
-int nb_ways; /* Number of ways in the TLB set*/
-int last_way;/* Last used way used to allocate TLB in a LRU way  */
+int nb_ways; /* Number of ways in the TLB set */
+int last_way;/* Last used way used to allocate TLB in a LRU way */
 int id_tlbs; /* If 1, MMU has separated TLBs for instructions & data */
-int nb_pids; /* Number of available PID registers*/
-int tlb_type;/* Type of TLB we're dealing with   */
-ppc_tlb_t tlb;   /* TLB is optional. Allocate them only if needed*/
-/* 403 dedicated access protection registers */
-target_ulong pb[4];
-bool tlb_dirty;   /* Set to non-zero when modifying TLB  */
-bool kvm_sw_tlb;  /* non-zero if KVM SW TLB API is active*/
+int nb_pids; /* Number of available PID registers */
+int tlb_type;/* Type of TLB we're dealing with */
+ppc_tlb_t tlb;   /* TLB is optional. Allocate them only if needed */
+target_ulong pb[4]; /* 403 dedicated access protection registers */
+bool tlb_dirty;  /* Set to non-zero when modifying TLB */

[PULL 13/20] spapr: Allow changing offset for -kernel image

2020-02-20 Thread David Gibson
From: Alexey Kardashevskiy 

This allows moving the kernel in the guest memory. The option is useful
for step debugging (as Linux is linked at 0x0); it also allows loading
grub which is normally linked to run at 0x2.

This uses the existing kernel address by default.

Signed-off-by: Alexey Kardashevskiy 
Message-Id: <20200203032943.121178-6-...@ozlabs.ru>
Reviewed-by: Fabiano Rosas 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 38 +++---
 include/hw/ppc/spapr.h |  1 +
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index cb220fde45..828e2cc135 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1064,7 +1064,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, 
void *fdt)
 }
 
 if (spapr->kernel_size) {
-uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
+uint64_t kprop[2] = { cpu_to_be64(spapr->kernel_addr),
   cpu_to_be64(spapr->kernel_size) };
 
 _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel",
@@ -1252,7 +1252,8 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool 
reset, size_t space)
 /* Build memory reserve map */
 if (reset) {
 if (spapr->kernel_size) {
-_FDT((fdt_add_mem_rsv(fdt, KERNEL_LOAD_ADDR, spapr->kernel_size)));
+_FDT((fdt_add_mem_rsv(fdt, spapr->kernel_addr,
+  spapr->kernel_size)));
 }
 if (spapr->initrd_size) {
 _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base,
@@ -1285,7 +1286,9 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool 
reset, size_t space)
 
 static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
 {
-return (addr & 0x0fff) + KERNEL_LOAD_ADDR;
+SpaprMachineState *spapr = opaque;
+
+return (addr & 0x0fff) + spapr->kernel_addr;
 }
 
 static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp,
@@ -2967,14 +2970,15 @@ static void spapr_machine_init(MachineState *machine)
 uint64_t lowaddr = 0;
 
 spapr->kernel_size = load_elf(kernel_filename, NULL,
-  translate_kernel_address, NULL,
+  translate_kernel_address, spapr,
   NULL, , NULL, NULL, 1,
   PPC_ELF_MACHINE, 0, 0);
 if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) {
 spapr->kernel_size = load_elf(kernel_filename, NULL,
-  translate_kernel_address, NULL, NULL,
+  translate_kernel_address, spapr, 
NULL,
   , NULL, NULL, 0,
-  PPC_ELF_MACHINE, 0, 0);
+  PPC_ELF_MACHINE,
+  0, 0);
 spapr->kernel_le = spapr->kernel_size > 0;
 }
 if (spapr->kernel_size < 0) {
@@ -2988,7 +2992,7 @@ static void spapr_machine_init(MachineState *machine)
 /* Try to locate the initrd in the gap between the kernel
  * and the firmware. Add a bit of space just in case
  */
-spapr->initrd_base = (KERNEL_LOAD_ADDR + spapr->kernel_size
+spapr->initrd_base = (spapr->kernel_addr + spapr->kernel_size
   + 0x1) & ~0x;
 spapr->initrd_size = load_image_targphys(initrd_filename,
  spapr->initrd_base,
@@ -3234,6 +3238,18 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, 
const char *name,
 visit_type_uint32(v, name, (uint32_t *)opaque, errp);
 }
 
+static void spapr_get_kernel_addr(Object *obj, Visitor *v, const char *name,
+  void *opaque, Error **errp)
+{
+visit_type_uint64(v, name, (uint64_t *)opaque, errp);
+}
+
+static void spapr_set_kernel_addr(Object *obj, Visitor *v, const char *name,
+  void *opaque, Error **errp)
+{
+visit_type_uint64(v, name, (uint64_t *)opaque, errp);
+}
+
 static char *spapr_get_ic_mode(Object *obj, Error **errp)
 {
 SpaprMachineState *spapr = SPAPR_MACHINE(obj);
@@ -3339,6 +3355,14 @@ static void spapr_instance_init(Object *obj)
 object_property_add_bool(obj, "vfio-no-msix-emulation",
  spapr_get_msix_emulation, NULL, NULL);
 
+object_property_add(obj, "kernel-addr", "uint64", spapr_get_kernel_addr,
+spapr_set_kernel_addr, NULL, >kernel_addr,
+_abort);
+object_property_set_description(obj, "kernel-addr",
+stringify(KERNEL_LOAD_ADDR)
+" for -kernel is the default",
+NULL);
+spapr->kernel_addr = KERNEL_LOAD_ADDR;
 /* The machine class defines the 

[PULL 12/20] pnv/phb3: Add missing break statement

2020-02-20 Thread David Gibson
From: Greg Kurz 

We obviously don't want to print out an error message if addr points to
a valid register.

Reported-by: Coverity CID 1419391 Missing break in switch
Fixes: 9ae1329ee2fe "ppc/pnv: Add models for POWER8 PHB3 PCIe Host bridge"
Signed-off-by: Greg Kurz 
Message-Id: <158153365202.3229002.11521084761048102466.st...@bahia.lan>
Signed-off-by: David Gibson 
---
 hw/pci-host/pnv_phb3_pbcq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/pci-host/pnv_phb3_pbcq.c b/hw/pci-host/pnv_phb3_pbcq.c
index f232228b0e..7b9a121246 100644
--- a/hw/pci-host/pnv_phb3_pbcq.c
+++ b/hw/pci-host/pnv_phb3_pbcq.c
@@ -173,6 +173,7 @@ static void pnv_pbcq_pci_xscom_write(void *opaque, hwaddr 
addr,
 case PBCQ_PCI_BAR2:
 pbcq->pci_regs[reg] = val & 0xfc00ull;
 pnv_pbcq_update_map(pbcq);
+break;
 default:
 phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__,
 addr, val);
-- 
2.24.1




[PULL 11/20] pnv/phb4: Fix error path in pnv_pec_realize()

2020-02-20 Thread David Gibson
From: Greg Kurz 

Obviously, we want to pass _err so that we can check it then
line below, not errp.

Reported-by: Coverity CID 1419395 'Constant' variable guards dead code
Fixes: 4f9924c4d4cf "ppc/pnv: Add models for POWER9 PHB4 PCIe Host bridge"
Signed-off-by: Greg Kurz 
Message-Id: <158153364605.3229002.2796177658957390343.st...@bahia.lan>
Signed-off-by: David Gibson 
---
 hw/pci-host/pnv_phb4_pec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
index 68e1db3eac..911d147ffd 100644
--- a/hw/pci-host/pnv_phb4_pec.c
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -391,7 +391,7 @@ static void pnv_pec_realize(DeviceState *dev, Error **errp)
 
 object_property_set_int(stk_obj, i, "stack-no", _abort);
 object_property_set_link(stk_obj, OBJECT(pec), "pec", _abort);
-object_property_set_bool(stk_obj, true, "realized", errp);
+object_property_set_bool(stk_obj, true, "realized", _err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
-- 
2.24.1




[PULL 00/20] ppc-for-5.0 queue 20200221

2020-02-20 Thread David Gibson
The following changes since commit 7afee874f1b27abc998b8b747d16b77cb6398716:

  Merge remote-tracking branch 
'remotes/vivier2/tags/trivial-branch-pull-request' into staging (2020-02-20 
16:51:19 +)

are available in the Git repository at:

  git://github.com/dgibson/qemu.git tags/ppc-for-5.0-20200221

for you to fetch changes up to 438bafcac55308eef4f9029c94dbadd2c7ac3bb7:

  hw/ppc/virtex_ml507:fix leak of fdevice tree blob (2020-02-21 09:15:04 +1100)


ppc patch queue 2020-02-21

Here's the next patch of ppc target patches.  Highlights are:
  * Some fixes for CAS / unplug interactions
  * Remove some leaks of device trees
  * Some fixes for the PHB3 and PHB4 devices
  * Support for NVDIMMs on the pseries machine type
  * Assorted other fixes and cleanups


Alexey Kardashevskiy (2):
  spapr/rtas: Print message from "ibm,os-term"
  spapr: Allow changing offset for -kernel image

BALATON Zoltan (4):
  target/ppc/cpu.h: Remove duplicate includes
  target/ppc: Fix typo in comments
  target/ppc/cpu.h: Move fpu related members closer in cpu env
  target/ppc/cpu.h: Clean up comments in the struct CPUPPCState definition

Chen Qun (1):
  hw/ppc/virtex_ml507:fix leak of fdevice tree blob

Greg Kurz (5):
  pnv/phb3: Convert 1u to 1ull
  pnv/phb4: Fix error path in pnv_pec_realize()
  pnv/phb3: Add missing break statement
  spapr: Don't use spapr_drc_needed() in CAS code
  spapr: Fix handling of unplugged devices during CAS and migration

Laurent Vivier (2):
  qtest: Fix rtas dependencies
  ppc/pnv: Fix PCI_EXPRESS dependency

Michael S. Tsirkin (1):
  ppc: function to setup latest class options

Pan Nengyuan (1):
  ppc: free 'fdt' after reset the machine

Shivaprasad G Bhat (4):
  mem: move nvdimm_device_list to utilities
  nvdimm: add uuid property to nvdimm
  spapr: Add NVDIMM device support
  spapr: Add Hcalls to support PAPR NVDIMM device

 default-configs/ppc64-softmmu.mak  |   1 +
 hw/acpi/nvdimm.c   |  28 +--
 hw/mem/Kconfig |   2 +-
 hw/mem/nvdimm.c|  40 
 hw/pci-host/pnv_phb3_msi.c |   2 +-
 hw/pci-host/pnv_phb3_pbcq.c|   1 +
 hw/pci-host/pnv_phb4_pec.c |   2 +-
 hw/ppc/Kconfig |   4 +-
 hw/ppc/Makefile.objs   |   2 +-
 hw/ppc/e500.c  |   1 +
 hw/ppc/pnv.c   |   2 +
 hw/ppc/spapr.c | 116 +++--
 hw/ppc/spapr_drc.c |  62 -
 hw/ppc/spapr_events.c  |   4 +
 hw/ppc/spapr_hcall.c   |  14 +-
 hw/ppc/spapr_nvdimm.c  | 475 +
 hw/ppc/spapr_rtas.c|   7 +
 hw/ppc/virtex_ml507.c  |   1 +
 include/hw/mem/nvdimm.h|   7 +
 include/hw/ppc/spapr.h |   9 +-
 include/hw/ppc/spapr_drc.h |  13 +-
 include/hw/ppc/spapr_nvdimm.h  |  37 +++
 include/qemu/nvdimm-utils.h|   7 +
 qtest.c|   5 +-
 target/ppc/cpu.h   | 148 +---
 target/ppc/fpu_helper.c|   4 +-
 target/ppc/translate/fp-impl.inc.c |   6 +-
 util/Makefile.objs |   1 +
 util/nvdimm-utils.c|  29 +++
 29 files changed, 864 insertions(+), 166 deletions(-)
 create mode 100644 hw/ppc/spapr_nvdimm.c
 create mode 100644 include/hw/ppc/spapr_nvdimm.h
 create mode 100644 include/qemu/nvdimm-utils.h
 create mode 100644 util/nvdimm-utils.c



[PULL 14/20] target/ppc: Fix typo in comments

2020-02-20 Thread David Gibson
From: BALATON Zoltan 

"Deferred" was misspelled as "differed" in some comments, correct this
typo,

Signed-off-by: BALATON Zoltan 
Message-Id: <20200214155748.0896b745...@zero.eik.bme.hu>
Signed-off-by: David Gibson 
---
 target/ppc/fpu_helper.c| 4 ++--
 target/ppc/translate/fp-impl.inc.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index dc383242f7..ae43b08eb5 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -293,7 +293,7 @@ static void float_invalid_op_vxvc(CPUPPCState *env, bool 
set_fpcc,
 env->error_code = POWERPC_EXCP_FP | POWERPC_EXCP_FP_VXVC;
 /* Update the floating-point enabled exception summary */
 env->fpscr |= FP_FEX;
-/* Exception is differed */
+/* Exception is deferred */
 }
 }
 
@@ -644,7 +644,7 @@ static void do_float_check_status(CPUPPCState *env, 
uintptr_t raddr)
 
 if (cs->exception_index == POWERPC_EXCP_PROGRAM &&
 (env->error_code & POWERPC_EXCP_FP)) {
-/* Differred floating-point exception after target FPR update */
+/* Deferred floating-point exception after target FPR update */
 if (fp_exceptions_enabled(env)) {
 raise_exception_err_ra(env, cs->exception_index,
env->error_code, raddr);
diff --git a/target/ppc/translate/fp-impl.inc.c 
b/target/ppc/translate/fp-impl.inc.c
index d8e27bf4d5..9f7868ee28 100644
--- a/target/ppc/translate/fp-impl.inc.c
+++ b/target/ppc/translate/fp-impl.inc.c
@@ -781,7 +781,7 @@ static void gen_mtfsb1(DisasContext *ctx)
 tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
 tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
 }
-/* We can raise a differed exception */
+/* We can raise a deferred exception */
 gen_helper_float_check_status(cpu_env);
 }
 
@@ -817,7 +817,7 @@ static void gen_mtfsf(DisasContext *ctx)
 tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
 tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
 }
-/* We can raise a differed exception */
+/* We can raise a deferred exception */
 gen_helper_float_check_status(cpu_env);
 tcg_temp_free_i64(t1);
 }
@@ -850,7 +850,7 @@ static void gen_mtfsfi(DisasContext *ctx)
 tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
 tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
 }
-/* We can raise a differed exception */
+/* We can raise a deferred exception */
 gen_helper_float_check_status(cpu_env);
 }
 
-- 
2.24.1




[PULL 09/20] target/ppc/cpu.h: Remove duplicate includes

2020-02-20 Thread David Gibson
From: BALATON Zoltan 

Commit 74433bf083b added some includes but added them twice. Since
these are guarded against multiple inclusion including them once is
enough.

Signed-off-by: BALATON Zoltan 
Message-Id: <20200212223207.5a375746...@zero.eik.bme.hu>
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: David Gibson 
---
 target/ppc/cpu.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 3a1eb76004..07dd2b4da7 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -23,8 +23,6 @@
 #include "qemu/int128.h"
 #include "exec/cpu-defs.h"
 #include "cpu-qom.h"
-#include "exec/cpu-defs.h"
-#include "cpu-qom.h"
 
 /* #define PPC_EMULATE_32BITS_HYPV */
 
-- 
2.24.1




[PULL 06/20] nvdimm: add uuid property to nvdimm

2020-02-20 Thread David Gibson
From: Shivaprasad G Bhat 

For ppc64, PAPR requires the nvdimm device to have UUID property
set in the device tree. Add an option to get it from the user.

Signed-off-by: Shivaprasad G Bhat 
Reviewed-by: David Gibson 
Reviewed-by: Igor Mammedov 
Message-Id: 
<158131056931.2897.14057087440721445976.st...@lep8c.aus.stglabs.ibm.com>
Signed-off-by: David Gibson 
---
 hw/mem/nvdimm.c | 40 
 include/hw/mem/nvdimm.h |  7 +++
 2 files changed, 47 insertions(+)

diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 39f1426d1f..8e426d24bb 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -69,11 +69,51 @@ out:
 error_propagate(errp, local_err);
 }
 
+static void nvdimm_get_uuid(Object *obj, Visitor *v, const char *name,
+  void *opaque, Error **errp)
+{
+NVDIMMDevice *nvdimm = NVDIMM(obj);
+char *value = NULL;
+
+value = qemu_uuid_unparse_strdup(>uuid);
+
+visit_type_str(v, name, , errp);
+g_free(value);
+}
+
+
+static void nvdimm_set_uuid(Object *obj, Visitor *v, const char *name,
+  void *opaque, Error **errp)
+{
+NVDIMMDevice *nvdimm = NVDIMM(obj);
+Error *local_err = NULL;
+char *value;
+
+visit_type_str(v, name, , _err);
+if (local_err) {
+goto out;
+}
+
+if (qemu_uuid_parse(value, >uuid) != 0) {
+error_setg(errp, "Property '%s.%s' has invalid value",
+   object_get_typename(obj), name);
+goto out;
+}
+g_free(value);
+
+out:
+error_propagate(errp, local_err);
+}
+
+
 static void nvdimm_init(Object *obj)
 {
 object_property_add(obj, NVDIMM_LABEL_SIZE_PROP, "int",
 nvdimm_get_label_size, nvdimm_set_label_size, NULL,
 NULL, NULL);
+
+object_property_add(obj, NVDIMM_UUID_PROP, "QemuUUID", nvdimm_get_uuid,
+nvdimm_set_uuid, NULL, NULL, NULL);
 }
 
 static void nvdimm_finalize(Object *obj)
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index 523a9b3d4a..4807ca615b 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -25,6 +25,7 @@
 
 #include "hw/mem/pc-dimm.h"
 #include "hw/acpi/bios-linker-loader.h"
+#include "qemu/uuid.h"
 
 #define NVDIMM_DEBUG 0
 #define nvdimm_debug(fmt, ...)\
@@ -49,6 +50,7 @@
TYPE_NVDIMM)
 
 #define NVDIMM_LABEL_SIZE_PROP "label-size"
+#define NVDIMM_UUID_PROP   "uuid"
 #define NVDIMM_UNARMED_PROP"unarmed"
 
 struct NVDIMMDevice {
@@ -83,6 +85,11 @@ struct NVDIMMDevice {
  * the guest write persistence.
  */
 bool unarmed;
+
+/*
+ * The PPC64 - spapr requires each nvdimm device have a uuid.
+ */
+QemuUUID uuid;
 };
 typedef struct NVDIMMDevice NVDIMMDevice;
 
-- 
2.24.1




[PULL 07/20] spapr: Add NVDIMM device support

2020-02-20 Thread David Gibson
From: Shivaprasad G Bhat 

Add support for NVDIMM devices for sPAPR. Piggyback on existing nvdimm
device interface in QEMU to support virtual NVDIMM devices for Power.
Create the required DT entries for the device (some entries have
dummy values right now).

The patch creates the required DT node and sends a hotplug
interrupt to the guest. Guest is expected to undertake the normal
DR resource add path in response and start issuing PAPR SCM hcalls.

The device support is verified based on the machine version unlike x86.

This is how it can be used ..
Ex :
For coldplug, the device to be added in qemu command line as shown below
-object 
memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm0,share=yes,size=1073872896
-device 
nvdimm,label-size=128k,uuid=75a3cdd7-6a2f-4791-8d15-fe0a920e8e9e,memdev=memnvdimm0,id=nvdimm0,slot=0

For hotplug, the device to be added from monitor as below
object_add 
memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm0,share=yes,size=1073872896
device_add 
nvdimm,label-size=128k,uuid=75a3cdd7-6a2f-4791-8d15-fe0a920e8e9e,memdev=memnvdimm0,id=nvdimm0,slot=0

Signed-off-by: Shivaprasad G Bhat 
Signed-off-by: Bharata B Rao 
   [Early implementation]
Message-Id: 
<158131058078.2897.12767731856697459923.st...@lep8c.aus.stglabs.ibm.com>
Signed-off-by: David Gibson 
---
 default-configs/ppc64-softmmu.mak |   1 +
 hw/mem/Kconfig|   2 +-
 hw/ppc/Makefile.objs  |   2 +-
 hw/ppc/spapr.c|  69 ++--
 hw/ppc/spapr_drc.c|  19 
 hw/ppc/spapr_events.c |   4 +
 hw/ppc/spapr_nvdimm.c | 177 ++
 include/hw/ppc/spapr_drc.h|   9 ++
 include/hw/ppc/spapr_nvdimm.h |  37 +++
 9 files changed, 309 insertions(+), 11 deletions(-)
 create mode 100644 hw/ppc/spapr_nvdimm.c
 create mode 100644 include/hw/ppc/spapr_nvdimm.h

diff --git a/default-configs/ppc64-softmmu.mak 
b/default-configs/ppc64-softmmu.mak
index cca52665d9..ae0841fa3a 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -8,3 +8,4 @@ CONFIG_POWERNV=y
 
 # For pSeries
 CONFIG_PSERIES=y
+CONFIG_NVDIMM=y
diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig
index 620fd4cb59..2ad052a536 100644
--- a/hw/mem/Kconfig
+++ b/hw/mem/Kconfig
@@ -8,4 +8,4 @@ config MEM_DEVICE
 config NVDIMM
 bool
 default y
-depends on PC
+depends on (PC || PSERIES)
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index a4bac57be6..c3d3cc56eb 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -7,7 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o 
spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o
 obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o spapr_irq.o
-obj-$(CONFIG_PSERIES) += spapr_tpm_proxy.o
+obj-$(CONFIG_PSERIES) += spapr_tpm_proxy.o spapr_nvdimm.o
 obj-$(CONFIG_SPAPR_RNG) +=  spapr_rng.o
 obj-$(call land,$(CONFIG_PSERIES),$(CONFIG_LINUX)) += spapr_pci_vfio.o 
spapr_pci_nvlink2.o
 # IBM PowerNV
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 691c391060..cb220fde45 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -80,6 +80,7 @@
 #include "hw/ppc/spapr_cpu_core.h"
 #include "hw/mem/memory-device.h"
 #include "hw/ppc/spapr_tpm_proxy.h"
+#include "hw/ppc/spapr_nvdimm.h"
 
 #include "monitor/monitor.h"
 
@@ -675,6 +676,14 @@ static int spapr_populate_drmem_v2(SpaprMachineState 
*spapr, void *fdt,
 size = di->size;
 node = di->node;
 
+/*
+ * The NVDIMM area is hotpluggable after the NVDIMM is unplugged. The
+ * area is marked hotpluggable in the next iteration for the bigger
+ * chunk including the NVDIMM occupied area.
+ */
+if (info->value->type == MEMORY_DEVICE_INFO_KIND_NVDIMM)
+continue;
+
 /* Entry for hot-pluggable area */
 if (cur_addr < addr) {
 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, cur_addr / lmb_size);
@@ -1266,6 +1275,11 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool 
reset, size_t space)
 }
 }
 
+/* NVDIMM devices */
+if (mc->nvdimm_supported) {
+spapr_dt_persistent_memory(fdt);
+}
+
 return fdt;
 }
 
@@ -2629,6 +2643,7 @@ static void spapr_machine_init(MachineState *machine)
 {
 SpaprMachineState *spapr = SPAPR_MACHINE(machine);
 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+MachineClass *mc = MACHINE_GET_CLASS(machine);
 const char *kernel_filename = machine->kernel_filename;
 const char *initrd_filename = machine->initrd_filename;
 PCIHostState *phb;
@@ -2861,6 +2876,10 @@ static void spapr_machine_init(MachineState *machine)
 "may run and log hardware error on the destination");
 }
 
+if (mc->nvdimm_supported) {
+spapr_create_nvdimm_dr_connectors(spapr);
+}
+
 /* 

[PULL 03/20] ppc/pnv: Fix PCI_EXPRESS dependency

2020-02-20 Thread David Gibson
From: Laurent Vivier 

When PHB4 bridge has been added, the dependencies to PCIE_PORT has been
added to XIVE_SPAPR and indirectly to PSERIES.
The build of the PowerNV machine is fine while we also build the PSERIES
machine.
If we disable the PSERIES machine, the PowerNV build fails because the
PCI Express files are not built:

/usr/bin/ld: hw/ppc/pnv.o: in function `pnv_chip_power8_pic_print_info':
.../hw/ppc/pnv.c:623: undefined reference to `pnv_phb3_msi_pic_print_info'
/usr/bin/ld: hw/ppc/pnv.o: in function `pnv_chip_power9_pic_print_info':
.../hw/ppc/pnv.c:639: undefined reference to `pnv_phb4_pic_print_info'
/usr/bin/ld: ../hw/usb/hcd-ehci-pci.o: in function `usb_ehci_pci_write_config':
.../hw/usb/hcd-ehci-pci.c:129: undefined reference to `pci_default_write_config'
/usr/bin/ld: ../hw/usb/hcd-ehci-pci.o: in function `usb_ehci_pci_realize':
.../hw/usb/hcd-ehci-pci.c:68: undefined reference to `pci_allocate_irq'
/usr/bin/ld: .../hw/usb/hcd-ehci-pci.c:72: undefined reference to 
`pci_register_bar'
/usr/bin/ld: ../hw/usb/hcd-ehci-pci.o:(.data.rel+0x50): undefined reference to 
`vmstate_pci_device'

This patch fixes the problem by adding needed dependencies to POWERNV.

Fixes: 4f9924c4d4cf ("ppc/pnv: Add models for POWER9 PHB4 PCIe Host bridge")
Signed-off-by: Laurent Vivier 
Message-Id: <20200205232016.588202-3-lviv...@redhat.com>
Signed-off-by: David Gibson 
---
 hw/ppc/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index 354828bf13..dd86e664d2 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -29,6 +29,8 @@ config POWERNV
 select XICS
 select XIVE
 select FDT_PPC
+select PCI_EXPRESS
+select MSI_NONBROKEN
 
 config PPC405
 bool
@@ -135,8 +137,6 @@ config XIVE_SPAPR
 default y
 depends on PSERIES
 select XIVE
-select PCI
-select PCIE_PORT
 
 config XIVE_KVM
 bool
-- 
2.24.1




[PULL 08/20] spapr: Add Hcalls to support PAPR NVDIMM device

2020-02-20 Thread David Gibson
From: Shivaprasad G Bhat 

This patch implements few of the necessary hcalls for the nvdimm support.

PAPR semantics is such that each NVDIMM device is comprising of multiple
SCM(Storage Class Memory) blocks. The guest requests the hypervisor to
bind each of the SCM blocks of the NVDIMM device using hcalls. There can
be SCM block unbind requests in case of driver errors or unplug(not
supported now) use cases. The NVDIMM label read/writes are done through
hcalls.

Since each virtual NVDIMM device is divided into multiple SCM blocks,
the bind, unbind, and queries using hcalls on those blocks can come
independently. This doesn't fit well into the qemu device semantics,
where the map/unmap are done at the (whole)device/object level granularity.
The patch doesnt actually bind/unbind on hcalls but let it happen at the
device_add/del phase itself instead.

The guest kernel makes bind/unbind requests for the virtual NVDIMM device
at the region level granularity. Without interleaving, each virtual NVDIMM
device is presented as a separate guest physical address range. So, there
is no way a partial bind/unbind request can come for the vNVDIMM in a
hcall for a subset of SCM blocks of a virtual NVDIMM. Hence it is safe to
do bind/unbind everything during the device_add/del.

Signed-off-by: Shivaprasad G Bhat 
Message-Id: 
<158131059899.2897.11515211602702956854.st...@lep8c.aus.stglabs.ibm.com>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_nvdimm.c  | 298 +
 include/hw/ppc/spapr.h |   8 +-
 2 files changed, 305 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index d03c8d3a5c..74eeb8bb74 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -28,6 +28,7 @@
 #include "hw/mem/nvdimm.h"
 #include "qemu/nvdimm-utils.h"
 #include "hw/ppc/fdt.h"
+#include "qemu/range.h"
 
 void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size,
 Error **errp)
@@ -175,3 +176,300 @@ void spapr_dt_persistent_memory(void *fdt)
 
 return;
 }
+
+static target_ulong h_scm_read_metadata(PowerPCCPU *cpu,
+SpaprMachineState *spapr,
+target_ulong opcode,
+target_ulong *args)
+{
+uint32_t drc_index = args[0];
+uint64_t offset = args[1];
+uint64_t len = args[2];
+SpaprDrc *drc = spapr_drc_by_index(drc_index);
+NVDIMMDevice *nvdimm;
+NVDIMMClass *ddc;
+uint64_t data = 0;
+uint8_t buf[8] = { 0 };
+
+if (!drc || !drc->dev ||
+spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+return H_PARAMETER;
+}
+
+if (len != 1 && len != 2 &&
+len != 4 && len != 8) {
+return H_P3;
+}
+
+nvdimm = NVDIMM(drc->dev);
+if ((offset + len < offset) ||
+(nvdimm->label_size < len + offset)) {
+return H_P2;
+}
+
+ddc = NVDIMM_GET_CLASS(nvdimm);
+ddc->read_label_data(nvdimm, buf, len, offset);
+
+switch (len) {
+case 1:
+data = ldub_p(buf);
+break;
+case 2:
+data = lduw_be_p(buf);
+break;
+case 4:
+data = ldl_be_p(buf);
+break;
+case 8:
+data = ldq_be_p(buf);
+break;
+default:
+g_assert_not_reached();
+}
+
+args[0] = data;
+
+return H_SUCCESS;
+}
+
+static target_ulong h_scm_write_metadata(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+uint32_t drc_index = args[0];
+uint64_t offset = args[1];
+uint64_t data = args[2];
+uint64_t len = args[3];
+SpaprDrc *drc = spapr_drc_by_index(drc_index);
+NVDIMMDevice *nvdimm;
+NVDIMMClass *ddc;
+uint8_t buf[8] = { 0 };
+
+if (!drc || !drc->dev ||
+spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+return H_PARAMETER;
+}
+
+if (len != 1 && len != 2 &&
+len != 4 && len != 8) {
+return H_P4;
+}
+
+nvdimm = NVDIMM(drc->dev);
+if ((offset + len < offset) ||
+(nvdimm->label_size < len + offset)) {
+return H_P2;
+}
+
+switch (len) {
+case 1:
+if (data & 0xff00) {
+return H_P2;
+}
+stb_p(buf, data);
+break;
+case 2:
+if (data & 0x) {
+return H_P2;
+}
+stw_be_p(buf, data);
+break;
+case 4:
+if (data & 0x) {
+return H_P2;
+}
+stl_be_p(buf, data);
+break;
+case 8:
+stq_be_p(buf, data);
+break;
+default:
+g_assert_not_reached();
+}
+
+ddc = NVDIMM_GET_CLASS(nvdimm);
+ddc->write_label_data(nvdimm, buf, len, offset);
+
+return H_SUCCESS;
+}
+
+static target_ulong 

[PULL 05/20] mem: move nvdimm_device_list to utilities

2020-02-20 Thread David Gibson
From: Shivaprasad G Bhat 

nvdimm_device_list is required for parsing the list for devices
in subsequent patches. Move it to common utility area.

Signed-off-by: Shivaprasad G Bhat 
Reviewed-by: Igor Mammedov 
Reviewed-by: David Gibson 
Message-Id: 
<158131055857.2897.15658377276504711773.st...@lep8c.aus.stglabs.ibm.com>
Signed-off-by: David Gibson 
---
 hw/acpi/nvdimm.c| 28 +---
 include/qemu/nvdimm-utils.h |  7 +++
 util/Makefile.objs  |  1 +
 util/nvdimm-utils.c | 29 +
 4 files changed, 38 insertions(+), 27 deletions(-)
 create mode 100644 include/qemu/nvdimm-utils.h
 create mode 100644 util/nvdimm-utils.c

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 9fdad6dc3f..5219dd0e2e 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -32,33 +32,7 @@
 #include "hw/acpi/bios-linker-loader.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/mem/nvdimm.h"
-
-static int nvdimm_device_list(Object *obj, void *opaque)
-{
-GSList **list = opaque;
-
-if (object_dynamic_cast(obj, TYPE_NVDIMM)) {
-*list = g_slist_append(*list, DEVICE(obj));
-}
-
-object_child_foreach(obj, nvdimm_device_list, opaque);
-return 0;
-}
-
-/*
- * inquire NVDIMM devices and link them into the list which is
- * returned to the caller.
- *
- * Note: it is the caller's responsibility to free the list to avoid
- * memory leak.
- */
-static GSList *nvdimm_get_device_list(void)
-{
-GSList *list = NULL;
-
-object_child_foreach(qdev_get_machine(), nvdimm_device_list, );
-return list;
-}
+#include "qemu/nvdimm-utils.h"
 
 #define NVDIMM_UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
diff --git a/include/qemu/nvdimm-utils.h b/include/qemu/nvdimm-utils.h
new file mode 100644
index 00..4b8b198ba7
--- /dev/null
+++ b/include/qemu/nvdimm-utils.h
@@ -0,0 +1,7 @@
+#ifndef NVDIMM_UTILS_H
+#define NVDIMM_UTILS_H
+
+#include "qemu/osdep.h"
+
+GSList *nvdimm_get_device_list(void);
+#endif
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 11262aafaf..6b38b67cf1 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -20,6 +20,7 @@ util-obj-y += envlist.o path.o module.o
 util-obj-y += host-utils.o
 util-obj-y += bitmap.o bitops.o hbitmap.o
 util-obj-y += fifo8.o
+util-obj-y += nvdimm-utils.o
 util-obj-y += cacheinfo.o
 util-obj-y += error.o qemu-error.o
 util-obj-y += qemu-print.o
diff --git a/util/nvdimm-utils.c b/util/nvdimm-utils.c
new file mode 100644
index 00..5cc768ca47
--- /dev/null
+++ b/util/nvdimm-utils.c
@@ -0,0 +1,29 @@
+#include "qemu/nvdimm-utils.h"
+#include "hw/mem/nvdimm.h"
+
+static int nvdimm_device_list(Object *obj, void *opaque)
+{
+GSList **list = opaque;
+
+if (object_dynamic_cast(obj, TYPE_NVDIMM)) {
+*list = g_slist_append(*list, DEVICE(obj));
+}
+
+object_child_foreach(obj, nvdimm_device_list, opaque);
+return 0;
+}
+
+/*
+ * inquire NVDIMM devices and link them into the list which is
+ * returned to the caller.
+ *
+ * Note: it is the caller's responsibility to free the list to avoid
+ * memory leak.
+ */
+GSList *nvdimm_get_device_list(void)
+{
+GSList *list = NULL;
+
+object_child_foreach(qdev_get_machine(), nvdimm_device_list, );
+return list;
+}
-- 
2.24.1




[PULL 04/20] ppc: function to setup latest class options

2020-02-20 Thread David Gibson
From: "Michael S. Tsirkin" 

We are going to add more init for the latest machine, so move the setup
to a function so we don't have to change the DEFINE_SPAPR_MACHINE macro
each time.

Signed-off-by: Michael S. Tsirkin 
Message-Id: <20200207064628.1196095-1-...@redhat.com>
Reviewed-by: Laurent Vivier 
Reviewed-by: Greg Kurz 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index c9b2e0a5e0..691c391060 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4485,6 +4485,12 @@ static const TypeInfo spapr_machine_info = {
 },
 };
 
+static void spapr_machine_latest_class_options(MachineClass *mc)
+{
+mc->alias = "pseries";
+mc->is_default = 1;
+}
+
 #define DEFINE_SPAPR_MACHINE(suffix, verstr, latest) \
 static void spapr_machine_##suffix##_class_init(ObjectClass *oc, \
 void *data)  \
@@ -4492,8 +4498,7 @@ static const TypeInfo spapr_machine_info = {
 MachineClass *mc = MACHINE_CLASS(oc);\
 spapr_machine_##suffix##_class_options(mc);  \
 if (latest) {\
-mc->alias = "pseries";   \
-mc->is_default = 1;  \
+spapr_machine_latest_class_options(mc);  \
 }\
 }\
 static const TypeInfo spapr_machine_##suffix##_info = {  \
-- 
2.24.1




[PULL 01/20] spapr/rtas: Print message from "ibm,os-term"

2020-02-20 Thread David Gibson
From: Alexey Kardashevskiy 

The "ibm,os-term" RTAS call has a single parameter which is a pointer to
a message from the guest kernel about the termination cause; this prints
it.

Signed-off-by: Alexey Kardashevskiy 
Message-Id: <20200203032044.118585-1-...@ozlabs.ru>
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_rtas.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 883fe28465..656fdd2216 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -345,6 +345,13 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu,
 target_ulong args,
 uint32_t nret, target_ulong rets)
 {
+target_ulong msgaddr = rtas_ld(args, 0);
+char msg[512];
+
+cpu_physical_memory_read(msgaddr, msg, sizeof(msg) - 1);
+msg[sizeof(msg) - 1] = 0;
+
+error_report("OS terminated: %s", msg);
 qemu_system_guest_panicked(NULL);
 
 rtas_st(rets, 0, RTAS_OUT_SUCCESS);
-- 
2.24.1




[PULL 02/20] qtest: Fix rtas dependencies

2020-02-20 Thread David Gibson
From: Laurent Vivier 

qtest "rtas" command is only available with pseries not all ppc64 targets,
so if I try to compile only powernv machine, the build fails with:

  /usr/bin/ld: qtest.o: in function `qtest_process_command':
  .../qtest.c:645: undefined reference to `qtest_rtas_call'

We fix this by enabling rtas command only with pseries machine.

Fixes: eeddd59f5962 ("tests: add RTAS command in the protocol")
Signed-off-by: Laurent Vivier 
Message-Id: <20200205232016.588202-2-lviv...@redhat.com>
Signed-off-by: David Gibson 
---
 qtest.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/qtest.c b/qtest.c
index 12432f99cf..587dcbb4b5 100644
--- a/qtest.c
+++ b/qtest.c
@@ -27,7 +27,8 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/cutils.h"
-#ifdef TARGET_PPC64
+#include "config-devices.h"
+#ifdef CONFIG_PSERIES
 #include "hw/ppc/spapr_rtas.h"
 #endif
 
@@ -628,7 +629,7 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 #else
 qtest_sendf(chr, "OK little\n");
 #endif
-#ifdef TARGET_PPC64
+#ifdef CONFIG_PSERIES
 } else if (strcmp(words[0], "rtas") == 0) {
 uint64_t res, args, ret;
 unsigned long nargs, nret;
-- 
2.24.1




Re: [PATCH v3 19/20] Let cpu_[physical]_memory() calls pass a boolean 'is_write' argument

2020-02-20 Thread David Gibson
On Thu, Feb 20, 2020 at 02:05:47PM +0100, Philippe Mathieu-Daudé wrote:
> Use an explicit boolean type.
> 
> This commit was produced with the included Coccinelle script
> scripts/coccinelle/exec_rw_const.
> 
> Signed-off-by: Philippe Mathieu-Daudé 

ppc parts

Acked-by: David Gibson 

> ---
>  scripts/coccinelle/exec_rw_const.cocci | 14 ++
>  include/exec/cpu-common.h  |  4 ++--
>  hw/display/exynos4210_fimd.c   |  3 ++-
>  hw/display/milkymist-tmu2.c|  8 
>  hw/display/omap_dss.c  |  2 +-
>  hw/display/ramfb.c |  2 +-
>  hw/misc/pc-testdev.c   |  2 +-
>  hw/nvram/spapr_nvram.c |  4 ++--
>  hw/ppc/ppc440_uc.c |  6 --
>  hw/ppc/spapr_hcall.c   |  4 ++--
>  hw/s390x/ipl.c |  2 +-
>  hw/s390x/s390-pci-bus.c|  2 +-
>  hw/s390x/virtio-ccw.c  |  2 +-
>  hw/xen/xen_pt_graphics.c   |  2 +-
>  target/i386/hax-all.c  |  4 ++--
>  target/s390x/excp_helper.c |  2 +-
>  target/s390x/helper.c  |  6 +++---
>  17 files changed, 43 insertions(+), 26 deletions(-)
> 
> diff --git a/scripts/coccinelle/exec_rw_const.cocci 
> b/scripts/coccinelle/exec_rw_const.cocci
> index ee98ce988e..54b1cab8cd 100644
> --- a/scripts/coccinelle/exec_rw_const.cocci
> +++ b/scripts/coccinelle/exec_rw_const.cocci
> @@ -11,6 +11,20 @@ expression E1, E2, E3, E4, E5;
>  |
>  - address_space_rw(E1, E2, E3, E4, E5, 1)
>  + address_space_rw(E1, E2, E3, E4, E5, true)
> +|
> +
> +- cpu_physical_memory_rw(E1, E2, E3, 0)
> ++ cpu_physical_memory_rw(E1, E2, E3, false)
> +|
> +- cpu_physical_memory_rw(E1, E2, E3, 1)
> ++ cpu_physical_memory_rw(E1, E2, E3, true)
> +|
> +
> +- cpu_physical_memory_map(E1, E2, 0)
> ++ cpu_physical_memory_map(E1, E2, false)
> +|
> +- cpu_physical_memory_map(E1, E2, 1)
> ++ cpu_physical_memory_map(E1, E2, true)
>  )
>  
>  // Use address_space_write instead of casting to non-const
> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
> index 6bfe201779..e7fd5781ea 100644
> --- a/include/exec/cpu-common.h
> +++ b/include/exec/cpu-common.h
> @@ -74,12 +74,12 @@ void cpu_physical_memory_rw(hwaddr addr, void *buf,
>  static inline void cpu_physical_memory_read(hwaddr addr,
>  void *buf, hwaddr len)
>  {
> -cpu_physical_memory_rw(addr, buf, len, 0);
> +cpu_physical_memory_rw(addr, buf, len, false);
>  }
>  static inline void cpu_physical_memory_write(hwaddr addr,
>   const void *buf, hwaddr len)
>  {
> -cpu_physical_memory_rw(addr, (void *)buf, len, 1);
> +cpu_physical_memory_rw(addr, (void *)buf, len, true);
>  }
>  void *cpu_physical_memory_map(hwaddr addr,
>hwaddr *plen,
> diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c
> index c1071ecd46..ec6776680e 100644
> --- a/hw/display/exynos4210_fimd.c
> +++ b/hw/display/exynos4210_fimd.c
> @@ -1164,7 +1164,8 @@ static void 
> fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
>  goto error_return;
>  }
>  
> -w->host_fb_addr = cpu_physical_memory_map(fb_start_addr, _mapped_len, 
> 0);
> +w->host_fb_addr = cpu_physical_memory_map(fb_start_addr, _mapped_len,
> +  false);
>  if (!w->host_fb_addr) {
>  DPRINT_ERROR("Failed to map window %u framebuffer\n", win);
>  goto error_return;
> diff --git a/hw/display/milkymist-tmu2.c b/hw/display/milkymist-tmu2.c
> index 199f1227e7..513c0d5bab 100644
> --- a/hw/display/milkymist-tmu2.c
> +++ b/hw/display/milkymist-tmu2.c
> @@ -218,7 +218,7 @@ static void tmu2_start(MilkymistTMU2State *s)
>  glGenTextures(1, );
>  glBindTexture(GL_TEXTURE_2D, texture);
>  fb_len = 2ULL * s->regs[R_TEXHRES] * s->regs[R_TEXVRES];
> -fb = cpu_physical_memory_map(s->regs[R_TEXFBUF], _len, 0);
> +fb = cpu_physical_memory_map(s->regs[R_TEXFBUF], _len, false);
>  if (fb == NULL) {
>  glDeleteTextures(1, );
>  glXMakeContextCurrent(s->dpy, None, None, NULL);
> @@ -262,7 +262,7 @@ static void tmu2_start(MilkymistTMU2State *s)
>  
>  /* Read the QEMU dest. framebuffer into the OpenGL framebuffer */
>  fb_len = 2ULL * s->regs[R_DSTHRES] * s->regs[R_DSTVRES];
> -fb = cpu_physical_memory_map(s->regs[R_DSTFBUF], _len, 0);
> +fb = cpu_physical_memory_map(s->regs[R_DSTFBUF], _len, false);
>  if (fb == NULL) {
>  glDeleteTextures(1, );
>  glXMakeContextCurrent(s->dpy, None, None, NULL);
> @@ -281,7 +281,7 @@ static void tmu2_start(MilkymistTMU2State *s)
>  
>  /* Map the texture */
>  mesh_len = MESH_MAXSIZE*MESH_MAXSIZE*sizeof(struct vertex);
> -mesh = cpu_physical_memory_map(s->regs[R_VERTICESADDR], _len, 0);
> +mesh = 

[PATCH] migration/throttle: Add throttle-trig-thres migration parameter

2020-02-20 Thread Keqian Zhu
Currently, if the bytes_dirty_period is more than the 50% of
bytes_xfer_period, we start or increase throttling.

If we make this percentage higher, then we can tolerate higher
dirty rate during migration, which means less impact on guest.
The side effect of higher percentage is longer migration time.

We can configure this parameter to switch between migration time
firt or guest performance first. The default value is 50.

Signed-off-by: Keqian Zhu 
---
Cc: Juan Quintela 
Cc: "Dr. David Alan Gilbert" 
Cc: Eric Blake 
Cc: Markus Armbruster 
---
 migration/migration.c | 24 
 migration/ram.c   | 21 +++--
 monitor/hmp-cmds.c|  7 +++
 qapi/migration.json   | 16 +++-
 4 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 8fb68795dc..e6c2451734 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -78,6 +78,7 @@
 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
 /* Define default autoconverge cpu throttle migration parameters */
+#define DEFAULT_MIGRATE_THROTTLE_TRIG_THRES 50
 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
@@ -778,6 +779,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->compress_wait_thread = s->parameters.compress_wait_thread;
 params->has_decompress_threads = true;
 params->decompress_threads = s->parameters.decompress_threads;
+params->has_throttle_trig_thres = true;
+params->throttle_trig_thres = s->parameters.throttle_trig_thres;
 params->has_cpu_throttle_initial = true;
 params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
 params->has_cpu_throttle_increment = true;
@@ -1164,6 +1167,15 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 return false;
 }
 
+if (params->has_throttle_trig_thres &&
+(params->throttle_trig_thres < 1 ||
+ params->throttle_trig_thres > 99)) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "throttle_trig_thres",
+   "an integer in the range of 1 to 99");
+return false;
+}
+
 if (params->has_cpu_throttle_initial &&
 (params->cpu_throttle_initial < 1 ||
  params->cpu_throttle_initial > 99)) {
@@ -1279,6 +1291,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 dest->decompress_threads = params->decompress_threads;
 }
 
+if (params->has_throttle_trig_thres) {
+dest->throttle_trig_thres = params->throttle_trig_thres;
+}
+
 if (params->has_cpu_throttle_initial) {
 dest->cpu_throttle_initial = params->cpu_throttle_initial;
 }
@@ -1360,6 +1376,10 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 s->parameters.decompress_threads = params->decompress_threads;
 }
 
+if (params->has_throttle_trig_thres) {
+s->parameters.throttle_trig_thres = params->throttle_trig_thres;
+}
+
 if (params->has_cpu_throttle_initial) {
 s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
 }
@@ -3506,6 +3526,9 @@ static Property migration_properties[] = {
 DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
   parameters.decompress_threads,
   DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
+DEFINE_PROP_UINT8("x-throttle-trig-thres", MigrationState,
+  parameters.throttle_trig_thres,
+  DEFAULT_MIGRATE_THROTTLE_TRIG_THRES),
 DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
   parameters.cpu_throttle_initial,
   DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
@@ -3606,6 +3629,7 @@ static void migration_instance_init(Object *obj)
 params->has_compress_level = true;
 params->has_compress_threads = true;
 params->has_decompress_threads = true;
+params->has_throttle_trig_thres = true;
 params->has_cpu_throttle_initial = true;
 params->has_cpu_throttle_increment = true;
 params->has_max_bandwidth = true;
diff --git a/migration/ram.c b/migration/ram.c
index ed23ed1c7c..28081cb1e1 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -901,6 +901,11 @@ static void migration_bitmap_sync(RAMState *rs)
 RAMBlock *block;
 int64_t end_time;
 uint64_t bytes_xfer_now;
+uint64_t bytes_dirty_period;
+uint64_t bytes_xfer_period;
+uint64_t bytes_dirty_thres;
+uint64_t throttle_trig_thres;
+MigrationState *s = migrate_get_current();
 
 ram_counters.dirty_sync_count++;
 
@@ -934,13 +939,17 @@ static void migration_bitmap_sync(RAMState *rs)
  * throttling logic during the bulk phase of block migration. */
 if (migrate_auto_converge() && 

Re: [PATCH 1/2] riscv: roms: Add 32-bit OpenSBI firmware image for sifive_u

2020-02-20 Thread Bin Meng
Hi Philippe,

On Fri, Feb 21, 2020 at 1:31 AM Philippe Mathieu-Daudé
 wrote:
>
> Hi Bin,
>
> On 2/20/20 3:42 PM, Bin Meng wrote:
> > Although the real world SiFive HiFive Unleashed board is a 64-bit
> > hardware configuration, with QEMU it is possible to test 32-bit
> > configuration with the same hardware features.
> >
> > This updates the roms Makefile to add the build rules for creating
> > the 32-bit OpenSBI firmware image for sifive_u machine. A pre-built
> > OpenSBI image (built from commit 3e7d666) has been added as the
> > default bios for 32-bit sifive_u machine.
>
> With QEMU:
>
> fatal: ambiguous argument '3e7d666': unknown revision or path not in the
> working tree.
>
> This looks like an OpenSBI commit but QEMU only include up to v0.5.
>
> Can you build v0.5? Else can you update the submodule?
>

Will do in v2.

> Also, can you add a CI job to build this, so we have reproducible builds
> (see QEMU commit 71920809ceabed as example)?

I cannot find any document for how to test CI job with gitlab CI. Does
QEMU has a public CI runner for testing?

Regards,
Bin



[PATCH v2] gdbstub: Fix single-step issue by confirming 'vContSupported+' feature to gdb

2020-02-20 Thread Changbin Du
Recently when debugging an arm32 system on qemu, I found sometimes the
single-step command (stepi) is not working. This can be reproduced by
below steps:
 1) start qemu-system-arm -s -S .. and wait for gdb connection.
 2) start gdb and connect to qemu. In my case, gdb gets a wrong value
(0x60) for PC, which is an another bug.
 3) After connected, type 'stepi' and expect it will stop at next ins.

But, it has never stopped. This because:
 1) We doesn't report ‘vContSupported’ feature to gdb explicitly and gdb
think we do not support it. In this case, gdb use a software breakpoint
to emulate single-step.
 2) Since gdb gets a wrong initial value of PC, then gdb inserts a
breakpoint to wrong place (PC+4).

Not only for the arm target, Philippe has also encountered this on MIPS.
Probably gdb has different assumption for different architectures.

Since we do support ‘vContSupported’ query command, so let's tell gdb that
we support it.

Before this change, gdb send below 'Z0' packet to implement single-step:
gdb_handle_packet: Z0,4,4

After this change, gdb send "vCont;s.." which is expected:
gdb_handle_packet: vCont?
put_packet: vCont;c;C;s;S
gdb_handle_packet: vCont;s:p1.1;c:p1.-1

Signed-off-by: Changbin Du 
Tested-by: Philippe Mathieu-Daudé 

---
v2: polish commit message.
---
 gdbstub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gdbstub.c b/gdbstub.c
index ce304ff482..adccd938e2 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2111,7 +2111,7 @@ static void handle_query_supported(GdbCmdContext 
*gdb_ctx, void *user_ctx)
 gdb_ctx->s->multiprocess = true;
 }
 
-pstrcat(gdb_ctx->str_buf, sizeof(gdb_ctx->str_buf), ";multiprocess+");
+pstrcat(gdb_ctx->str_buf, sizeof(gdb_ctx->str_buf), 
";vContSupported+;multiprocess+");
 put_packet(gdb_ctx->s, gdb_ctx->str_buf);
 }
 
-- 
2.25.0




Re: [PATCH qemu v7 0/5] spapr: Kill SLOF

2020-02-20 Thread Alexey Kardashevskiy



On 20/02/2020 21:01, Paolo Bonzini wrote:
> On 20/02/20 07:16, Alexey Kardashevskiy wrote:
>> This is another attempt to implement minimalistic
>> Open Firmware Client Interface in QEMU.
>>
>> With this thing, I can boot unmodified Ubuntu 18.04 and Fedora 30
>> directly from the disk without SLOF.
>>
>> A useful discussion happened esrlier:
>> https://lore.kernel.org/qemu-devel/f881c2e7-be92-9695-6e19-2dd88cbc6...@ozlabs.ru/
>>
>> 5/5 is kind of controvertial though. This respin does not include
>> networking.
>>
>> This is based on sha1
>> 015fb0ead60d Chen Qun "hw/ppc/virtex_ml507:fix leak of fdevice tree blob".
> 
> I would like to play with this.  Can you provide a disk image that just
> reads the first sector of the disk using an OpenFirmware read command,
> and dumps it to stdout?

I am not quite sure I understood the request.  Write my own small
firmware and replace GRUB with it? The firmware from 5/5 reads first 2
sectors and the entire PReP, I could add there stuff if that helps (I
have "work in progress" patch for the firmware with printk/etc borrowed
from SLOF).

>  (Also, I lost the pointer to your super-minimal
> pSeries firmware).

It is incorporated into these patches under /pc-bios/vof - 4/5 has the
minimum (may be even too much), 5/5 has MBR+GPT+ELF.



-- 
Alexey



Re: [PATCH] tcg: gdbstub: Fix single-step issue on arm target

2020-02-20 Thread Changbin Du
On Thu, Feb 20, 2020 at 06:47:26PM +0100, Philippe Mathieu-Daudé wrote:
> On 2/20/20 4:58 PM, Changbin Du wrote:
> > Recently when debugging an arm32 system on qemu, I found sometimes the
> > single-step command (stepi) is not working. This can be reproduced by
> > below steps:
> >   1) start qemu-system-arm -s -S .. and wait for gdb connection.
> >   2) start gdb and connect to qemu. In my case, gdb gets a wrong value
> >  (0x60) for PC.
> >   3) After connected, type 'stepi' and expect it will stop at next ins.
> > 
> > But, it has never stopped. This because:
> >   1) We doesn't report ‘vContSupported’ feature to gdb explicitly and gdb
> >  think we do not support it. In this case, gdb use a software breakpoint
> >  to emulate single-step.
> >   2) Since gdb gets a wrong initial value of PC, then gdb inserts a
> >  breakpoint to wrong place (PC+4).
> > 
> > Since we do support ‘vContSupported’ query command, so let's tell gdb that
> > we support it.
> > 
> > Before this change, gdb send below 'Z0' packet to implement single-step:
> > gdb_handle_packet: Z0,4,4
> > 
> > After this change, gdb send "vCont;s.." which is expected:
> > gdb_handle_packet: vCont?
> > put_packet: vCont;c;C;s;S
> > gdb_handle_packet: vCont;s:p1.1;c:p1.-1
> 
> You actually fixed this for all architectures :)
> 
> This has been annoying me on MIPS since more than a year...
> 
> I haven't checked the GDB protocol spec, but so far:
> Tested-by: Philippe Mathieu-Daudé 
>
Thanks for your feedback. :)

-- 
Cheers,
Changbin Du



Re: [PATCH] tcg: gdbstub: Fix single-step issue on arm target

2020-02-20 Thread Changbin Du
On Thu, Feb 20, 2020 at 10:24:37PM +0100, Luc Michel wrote:
> Hi,
> 
> On 2/20/20 4:58 PM, Changbin Du wrote:
> > Recently when debugging an arm32 system on qemu, I found sometimes the
> > single-step command (stepi) is not working. This can be reproduced by
> > below steps:
> >  1) start qemu-system-arm -s -S .. and wait for gdb connection.
> >  2) start gdb and connect to qemu. In my case, gdb gets a wrong value
> > (0x60) for PC.
> >  3) After connected, type 'stepi' and expect it will stop at next ins.
> > 
> > But, it has never stopped. This because:
> >  1) We doesn't report ‘vContSupported’ feature to gdb explicitly and gdb
> > think we do not support it. In this case, gdb use a software breakpoint
> > to emulate single-step.
> >  2) Since gdb gets a wrong initial value of PC, then gdb inserts a
> > breakpoint to wrong place (PC+4).
> > 
> > Since we do support ‘vContSupported’ query command, so let's tell gdb that
> > we support it.
> > 
> > Before this change, gdb send below 'Z0' packet to implement single-step:
> > gdb_handle_packet: Z0,4,4
> > 
> > After this change, gdb send "vCont;s.." which is expected:
> > gdb_handle_packet: vCont?
> > put_packet: vCont;c;C;s;S
> > gdb_handle_packet: vCont;s:p1.1;c:p1.-1
> I'm curious, I never experienced this behaviour from GDB. What GDB and
> QEMU versions are you using?
> 
For QEMU, it's built from mainline.
For GDB, I have tried 8.1 and latest 9.1.

> On my side (GDB 9.1), even without 'vContSupported+' in the 'qSupported'
> answer, GDB sends a 'vCont?' packet on the first stepi:
> 
> 0x in ?? ()
> (gdb) si
> Sending packet: $m0,4#fd...Ack
> Packet received: 
> Sending packet: $vCont?#49...Ack
> Packet received: vCont;c;C;s;S
> Packet vCont (verbose-resume) is supported
> Sending packet: $vCont;s:p1.1;c:p1.-1#f7...Ack
> Packet received: T05thread:p01.01;
>
hmm, On my side, this is 100% reproducable on arm32, but aarch64 doesn't. I
think the GDB has different assumptions for different arch.

> Your second issue (wrong PC value) should be investigated though. Does
> it happen on QEMU vanilla? Do you have a way to reproduce this bug?
> 
This is also 100% reproducable for my tested elf guest. But so sorry that I
can't share it. Probablly I will check this issue some days later.

> Anyway after re-reading the GDB remote protocol documentation, I think
> your patch is right, the feature should be advertised.
> 
> However I think your commit message needs some modifications. This fix
> is not specific to ARM or TCG, but to the gdbstub itself. You also
> mention this bug you have with PC, which is not related to the bug you
> are fixing here. Could you rewrite it in a more generic way? You simply
> need to emphasis the effect of advertising the 'vContSupported+' feature
> on GDB.
> 
sure.

> Thanks.
> 
> -- 
> Luc

-- 
Cheers,
Changbin Du



Re: [PATCH v1 0/2] linux-user: generate syscall_nr.sh for RISC-V

2020-02-20 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/cover.1582240656.git.alistair.fran...@wdc.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH v1 0/2]  linux-user: generate syscall_nr.sh for RISC-V
Message-id: cover.1582240656.git.alistair.fran...@wdc.com
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag] patchew/cover.1582240656.git.alistair.fran...@wdc.com -> 
patchew/cover.1582240656.git.alistair.fran...@wdc.com
Switched to a new branch 'test'
a8f95ff linux-user/riscv: Update the syscall_nr's to the 5.5 kernel
8f319fe linux-user: Protect more syscalls

=== OUTPUT BEGIN ===
1/2 Checking commit 8f319fe4044f (linux-user: Protect more syscalls)
ERROR: space prohibited between function name and open parenthesis '('
#75: FILE: linux-user/syscall.c:1088:
+#if defined(TARGET_NR_getrlimit) || defined (TARGET_NR_ugetrlimit)

total: 1 errors, 0 warnings, 121 lines checked

Patch 1/2 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

2/2 Checking commit a8f95ff78d20 (linux-user/riscv: Update the syscall_nr's to 
the 5.5 kernel)
WARNING: Block comments use a leading /* on a separate line
#377: FILE: linux-user/riscv/syscall_nr.h:442:
+/* Alias some of the older pre 64-bit time_t syscalls to the 64-bit

WARNING: Block comments use a trailing */ on a separate line
#378: FILE: linux-user/riscv/syscall_nr.h:443:
+ * ones for RV32. This is based on the list used by glibc. */

total: 0 errors, 2 warnings, 370 lines checked

Patch 2/2 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/cover.1582240656.git.alistair.fran...@wdc.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[Bug 1863333] Re: Assigning NVMe disk to a domain causes VFIO_MAP_DMA errors

2020-02-20 Thread Alex Williamson
This seems to be due to the vfio-helper code assuming it can map an
arbitrarily large IOVA range starting at 64K base address.  x86
processors typically have a reserved range near the top of the 32-bit
address space which is used for MSI support which is used by the
interrupt remapper where we cannot create an overlapping DMA mapping
window.  Therefore once you have something approaching a 4G VM, you'll
see the initial -EINVAL and I assume things fall apart from there.
Changing the base IOVA address in vfio-helpers.c seems to be sufficient,
ex:

#define QEMU_VFIO_IOVA_MIN 0x1ULL

This might be a sufficient legacy fix, but we do now expose valid IOVA
ranges through the VFIO API which would allow this driver to dynamically
pick IOVA ranges based on what the platform actually supports.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/186

Title:
  Assigning NVMe disk to a domain causes VFIO_MAP_DMA errors

Status in QEMU:
  New

Bug description:
  I'm seeing some errors when assigning my NVMe disk to qemu. This is
  the full command line:

  
  /home/zippy/work/qemu/qemu.git/x86_64-softmmu/qemu-system-x86_64 \
  -name guest=fedora,debug-threads=on \
  -S \
  -object 
secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-fedora/master-key.aes
 \
  -machine pc-i440fx-4.1,accel=kvm,usb=off,dump-guest-core=off \
  -cpu host \
  -m size=4194304k,slots=16,maxmem=1099511627776k \
  -overcommit mem-lock=off \
  -smp 4,sockets=1,dies=1,cores=2,threads=2 \
  -object iothread,id=iothread1 \
  -object iothread,id=iothread2 \
  -object iothread,id=iothread3 \
  -object iothread,id=iothread4 \
  -mem-prealloc \
  -mem-path /hugepages2M/libvirt/qemu/2-fedora \
  -numa node,nodeid=0,cpus=0,mem=4096 \
  -uuid 63840878-0deb-4095-97e6-fc444d9bc9fa \
  -no-user-config \
  -nodefaults \
  -chardev socket,id=charmonitor,fd=31,server,nowait \
  -mon chardev=charmonitor,id=monitor,mode=control \
  -rtc base=utc \
  -no-shutdown \
  -global PIIX4_PM.disable_s3=0 \
  -global PIIX4_PM.disable_s4=0 \
  -boot menu=on,strict=on \
  -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \
  -device virtio-scsi-pci,id=scsi0,bus=pci.0,addr=0x4 \
  -device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x5 \
  -blockdev 
'{"driver":"file","filename":"/var/lib/libvirt/images/fedora.qcow2","node-name":"libvirt-2-storage","auto-read-only":true,"discard":"unmap"}'
 \
  -blockdev 
'{"node-name":"libvirt-2-format","read-only":false,"discard":"unmap","driver":"qcow2","file":"libvirt-2-storage","backing":null}'
 \
  -device 
scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,device_id=drive-scsi0-0-0-0,drive=libvirt-2-format,id=scsi0-0-0-0,bootindex=1
 \
  -blockdev 
'{"driver":"nvme","device":":02:00.0","namespace":1,"node-name":"libvirt-1-storage","auto-read-only":true,"discard":"unmap"}'
 \
  -blockdev 
'{"node-name":"libvirt-1-format","read-only":false,"driver":"raw","file":"libvirt-1-storage"}'
 \
  -device 
virtio-blk-pci,scsi=off,bus=pci.0,addr=0x6,drive=libvirt-1-format,id=virtio-disk0
 \
  -netdev tap,fd=33,id=hostnet0,vhost=on,vhostfd=34 \
  -device 
virtio-net-pci,host_mtu=9000,netdev=hostnet0,id=net0,mac=52:54:00:a4:6f:91,bus=pci.0,addr=0x3
 \
  -chardev pty,id=charserial0 \
  -device isa-serial,chardev=charserial0,id=serial0 \
  -chardev socket,id=charchannel0,fd=35,server,nowait \
  -device 
virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=org.qemu.guest_agent.0
 \
  -spice port=5900,addr=0.0.0.0,disable-ticketing,seamless-migration=on \
  -device virtio-vga,id=video0,virgl=on,max_outputs=1,bus=pci.0,addr=0x2 \
  -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7 \
  -sandbox 
on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \
  -msg timestamp=on

  And these are the errors I see:

  2020-02-14T09:06:18.183167Z qemu-system-x86_64: VFIO_MAP_DMA failed: Invalid 
argument
  2020-02-14T09:10:49.753767Z qemu-system-x86_64: VFIO_MAP_DMA failed: Cannot 
allocate memory
  2020-02-14T09:11:04.530344Z qemu-system-x86_64: VFIO_MAP_DMA failed: No space 
left on device
  2020-02-14T09:11:04.531087Z qemu-system-x86_64: VFIO_MAP_DMA failed: No space 
left on device
  2020-02-14T09:11:04.531230Z qemu-system-x86_64: VFIO_MAP_DMA failed: No space 
left on device

  
  I'm doing nothing with the disk inside the guest, but:

# dd if=/dev/vda of=/dev/null status=progress

  (the disk appears as /dev/vda in the guest). Surprisingly, I do not
  see these errors when I use the traditional PCI assignment (-device
  vfio-pci). My versions of kernel and qemu:

  moe ~ # uname -r
  5.4.15-gentoo
  moe ~ # /home/zippy/work/qemu/qemu.git/x86_64-softmmu/qemu-system-x86_64 
--version
  QEMU emulator version 4.2.50 (v4.2.0-1439-g5d6542bea7-dirty)
  Copyright (c) 2003-2019 Fabrice Bellard and the QEMU Project developers

To manage notifications about this bug go to:

[PATCH v1 1/2] linux-user: Protect more syscalls

2020-02-20 Thread Alistair Francis
New y2038 safe 32-bit architectures (like RISC-V) don't support old
syscalls with a 32-bit time_t. The kernel defines new *_time64 versions
of these syscalls. Add some more #ifdefs to syscall.c in linux-user to
allow us to compile without these old syscalls.

Signed-off-by: Alistair Francis 
---
 linux-user/strace.c  |  2 ++
 linux-user/syscall.c | 18 ++
 2 files changed, 20 insertions(+)

diff --git a/linux-user/strace.c b/linux-user/strace.c
index 3d4d684450..2eb8ae3d31 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -770,6 +770,7 @@ print_syscall_ret_newselect(const struct syscallname *name, 
abi_long ret)
 #define TARGET_TIME_OOP  3   /* leap second in progress */
 #define TARGET_TIME_WAIT 4   /* leap second has occurred */
 #define TARGET_TIME_ERROR5   /* clock not synchronized */
+#ifdef TARGET_NR_adjtimex
 static void
 print_syscall_ret_adjtimex(const struct syscallname *name, abi_long ret)
 {
@@ -808,6 +809,7 @@ print_syscall_ret_adjtimex(const struct syscallname *name, 
abi_long ret)
 
 gemu_log("\n");
 }
+#endif
 
 UNUSED static struct flags access_flags[] = {
 FLAG_GENERIC(F_OK),
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index c930577686..44632a7f6a 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -738,8 +738,10 @@ safe_syscall3(ssize_t, read, int, fd, void *, buff, 
size_t, count)
 safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count)
 safe_syscall4(int, openat, int, dirfd, const char *, pathname, \
   int, flags, mode_t, mode)
+#if defined(TARGET_NR_wait4)
 safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \
   struct rusage *, rusage)
+#endif
 safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \
   int, options, struct rusage *, rusage)
 safe_syscall3(int, execve, const char *, filename, char **, argv, char **, 
envp)
@@ -776,8 +778,10 @@ safe_syscall4(int, rt_sigtimedwait, const sigset_t *, 
these, siginfo_t *, uinfo,
   const struct timespec *, uts, size_t, sigsetsize)
 safe_syscall4(int, accept4, int, fd, struct sockaddr *, addr, socklen_t *, len,
   int, flags)
+#if defined(TARGET_NR_nanosleep)
 safe_syscall2(int, nanosleep, const struct timespec *, req,
   struct timespec *, rem)
+#endif
 #ifdef TARGET_NR_clock_nanosleep
 safe_syscall4(int, clock_nanosleep, const clockid_t, clock, int, flags,
   const struct timespec *, req, struct timespec *, rem)
@@ -1063,6 +1067,7 @@ static inline abi_long host_to_target_rusage(abi_ulong 
target_addr,
 return 0;
 }
 
+#ifdef TARGET_NR_setrlimit
 static inline rlim_t target_to_host_rlim(abi_ulong target_rlim)
 {
 abi_ulong target_rlim_swap;
@@ -1078,7 +1083,9 @@ static inline rlim_t target_to_host_rlim(abi_ulong 
target_rlim)
 
 return result;
 }
+#endif
 
+#if defined(TARGET_NR_getrlimit) || defined (TARGET_NR_ugetrlimit)
 static inline abi_ulong host_to_target_rlim(rlim_t rlim)
 {
 abi_ulong target_rlim_swap;
@@ -1092,6 +1099,7 @@ static inline abi_ulong host_to_target_rlim(rlim_t rlim)
 
 return result;
 }
+#endif
 
 static inline int target_to_host_resource(int code)
 {
@@ -8584,6 +8592,7 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 }
 }
 return ret;
+#if defined(TARGET_NR_gettimeofday)
 case TARGET_NR_gettimeofday:
 {
 struct timeval tv;
@@ -8594,6 +8603,8 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 }
 }
 return ret;
+#endif
+#if defined(TARGET_NR_settimeofday)
 case TARGET_NR_settimeofday:
 {
 struct timeval tv, *ptv = NULL;
@@ -8615,6 +8626,7 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 
 return get_errno(settimeofday(ptv, ptz));
 }
+#endif
 #if defined(TARGET_NR_select)
 case TARGET_NR_select:
 #if defined(TARGET_WANT_NI_OLD_SELECT)
@@ -9260,6 +9272,7 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 return do_syscall(cpu_env, arg1 & 0x, arg2, arg3, arg4, arg5,
   arg6, arg7, arg8, 0);
 #endif
+#if defined(TARGET_NR_wait4)
 case TARGET_NR_wait4:
 {
 int status;
@@ -9287,6 +9300,7 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 }
 }
 return ret;
+#endif
 #ifdef TARGET_NR_swapoff
 case TARGET_NR_swapoff:
 if (!(p = lock_user_string(arg1)))
@@ -9431,6 +9445,7 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 return do_vm86(cpu_env, arg1, arg2);
 #endif
 #endif
+#if defined(TARGET_NR_adjtimex)
 case TARGET_NR_adjtimex:
 {
 struct timex host_buf;
@@ -9446,6 +9461,7 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 }
 }
 return ret;
+#endif
 #if 

[PATCH v1 2/2] linux-user/riscv: Update the syscall_nr's to the 5.5 kernel

2020-02-20 Thread Alistair Francis
Signed-off-by: Alistair Francis 
---
 linux-user/riscv/syscall_nr.h | 160 +-
 1 file changed, 158 insertions(+), 2 deletions(-)

diff --git a/linux-user/riscv/syscall_nr.h b/linux-user/riscv/syscall_nr.h
index 5c87282209..b2b071969b 100644
--- a/linux-user/riscv/syscall_nr.h
+++ b/linux-user/riscv/syscall_nr.h
@@ -10,7 +10,10 @@
 #define TARGET_NR_io_destroy 1
 #define TARGET_NR_io_submit 2
 #define TARGET_NR_io_cancel 3
+#ifndef TARGET_RISCV32
 #define TARGET_NR_io_getevents 4
+#endif
+
 #define TARGET_NR_setxattr 5
 #define TARGET_NR_lsetxattr 6
 #define TARGET_NR_fsetxattr 7
@@ -23,12 +26,17 @@
 #define TARGET_NR_removexattr 14
 #define TARGET_NR_lremovexattr 15
 #define TARGET_NR_fremovexattr 16
+
 #define TARGET_NR_getcwd 17
+
 #define TARGET_NR_lookup_dcookie 18
+
 #define TARGET_NR_eventfd2 19
+
 #define TARGET_NR_epoll_create1 20
 #define TARGET_NR_epoll_ctl 21
 #define TARGET_NR_epoll_pwait 22
+
 #define TARGET_NR_dup 23
 #define TARGET_NR_dup3 24
 #ifdef TARGET_RISCV32
@@ -36,27 +44,35 @@
 #else
 #define TARGET_NR_fcntl 25
 #endif
+
 #define TARGET_NR_inotify_init1 26
 #define TARGET_NR_inotify_add_watch 27
 #define TARGET_NR_inotify_rm_watch 28
+
 #define TARGET_NR_ioctl 29
+
 #define TARGET_NR_ioprio_set 30
 #define TARGET_NR_ioprio_get 31
+
 #define TARGET_NR_flock 32
+
 #define TARGET_NR_mknodat 33
 #define TARGET_NR_mkdirat 34
 #define TARGET_NR_unlinkat 35
 #define TARGET_NR_symlinkat 36
 #define TARGET_NR_linkat 37
-#define TARGET_NR_renameat 38
+
 #define TARGET_NR_umount2 39
 #define TARGET_NR_mount 40
 #define TARGET_NR_pivot_root 41
+
 #define TARGET_NR_nfsservctl 42
+
 #define TARGET_NR_statfs 43
 #define TARGET_NR_fstatfs 44
 #define TARGET_NR_truncate 45
 #define TARGET_NR_ftruncate 46
+
 #define TARGET_NR_fallocate 47
 #define TARGET_NR_faccessat 48
 #define TARGET_NR_chdir 49
@@ -69,9 +85,13 @@
 #define TARGET_NR_openat 56
 #define TARGET_NR_close 57
 #define TARGET_NR_vhangup 58
+
 #define TARGET_NR_pipe2 59
+
 #define TARGET_NR_quotactl 60
+
 #define TARGET_NR_getdents64 61
+
 #ifdef TARGET_RISCV32
 #define TARGET_NR__llseek 62
 #else
@@ -85,53 +105,91 @@
 #define TARGET_NR_pwrite64 68
 #define TARGET_NR_preadv 69
 #define TARGET_NR_pwritev 70
+
 #define TARGET_NR_sendfile 71
+
+#ifndef TARGET_RISCV32
 #define TARGET_NR_pselect6 72
 #define TARGET_NR_ppoll 73
+#endif
+
 #define TARGET_NR_signalfd4 74
+
 #define TARGET_NR_vmsplice 75
 #define TARGET_NR_splice 76
 #define TARGET_NR_tee 77
+
 #define TARGET_NR_readlinkat 78
 #define TARGET_NR_newfstatat 79
 #define TARGET_NR_fstat 80
+
 #define TARGET_NR_sync 81
 #define TARGET_NR_fsync 82
 #define TARGET_NR_fdatasync 83
 #define TARGET_NR_sync_file_range 84
+
 #define TARGET_NR_timerfd_create 85
+#ifndef TARGET_RISCV32
 #define TARGET_NR_timerfd_settime 86
 #define TARGET_NR_timerfd_gettime 87
+#endif
+
+#ifndef TARGET_RISCV32
 #define TARGET_NR_utimensat 88
+#endif
+
 #define TARGET_NR_acct 89
+
 #define TARGET_NR_capget 90
 #define TARGET_NR_capset 91
+
 #define TARGET_NR_personality 92
+
 #define TARGET_NR_exit 93
 #define TARGET_NR_exit_group 94
 #define TARGET_NR_waitid 95
+
 #define TARGET_NR_set_tid_address 96
 #define TARGET_NR_unshare 97
+
+#ifndef TARGET_RISCV32
 #define TARGET_NR_futex 98
+#endif
 #define TARGET_NR_set_robust_list 99
 #define TARGET_NR_get_robust_list 100
+
+#ifndef TARGET_RISCV32
 #define TARGET_NR_nanosleep 101
+#endif
+
 #define TARGET_NR_getitimer 102
 #define TARGET_NR_setitimer 103
+
 #define TARGET_NR_kexec_load 104
+
 #define TARGET_NR_init_module 105
 #define TARGET_NR_delete_module 106
+
 #define TARGET_NR_timer_create 107
+#ifndef TARGET_RISCV32
 #define TARGET_NR_timer_gettime 108
+#endif
 #define TARGET_NR_timer_getoverrun 109
+#ifndef TARGET_RISCV32
 #define TARGET_NR_timer_settime 110
+#endif
 #define TARGET_NR_timer_delete 111
+#ifndef TARGET_RISCV32
 #define TARGET_NR_clock_settime 112
 #define TARGET_NR_clock_gettime 113
 #define TARGET_NR_clock_getres 114
 #define TARGET_NR_clock_nanosleep 115
+#endif
+
 #define TARGET_NR_syslog 116
+
 #define TARGET_NR_ptrace 117
+
 #define TARGET_NR_sched_setparam 118
 #define TARGET_NR_sched_setscheduler 119
 #define TARGET_NR_sched_getscheduler 120
@@ -141,7 +199,10 @@
 #define TARGET_NR_sched_yield 124
 #define TARGET_NR_sched_get_priority_max 125
 #define TARGET_NR_sched_get_priority_min 126
+#ifndef TARGET_RISCV32
 #define TARGET_NR_sched_rr_get_interval 127
+#endif
+
 #define TARGET_NR_restart_syscall 128
 #define TARGET_NR_kill 129
 #define TARGET_NR_tkill 130
@@ -151,9 +212,12 @@
 #define TARGET_NR_rt_sigaction 134
 #define TARGET_NR_rt_sigprocmask 135
 #define TARGET_NR_rt_sigpending 136
+#ifndef TARGET_RISCV32
 #define TARGET_NR_rt_sigtimedwait 137
+#endif
 #define TARGET_NR_rt_sigqueueinfo 138
 #define TARGET_NR_rt_sigreturn 139
+
 #define TARGET_NR_setpriority 140
 #define TARGET_NR_getpriority 141
 #define TARGET_NR_reboot 142
@@ -177,15 +241,23 @@
 #define TARGET_NR_uname 160
 #define 

[PATCH v1 0/2] linux-user: generate syscall_nr.sh for RISC-V

2020-02-20 Thread Alistair Francis
This series updates the RISC-V syscall_nr.sh based on the 5.5 kernel.
There are two parts to this. One is just adding the new syscalls, the
other part is updating the RV32 syscalls to match the fact that RV32 is
a 64-bit time_t architectures (y2038) safe.

we need to make some changes to syscall.c to avoid warnings/errors
during compliling with the new syscall.

I did some RV32 user space testing after applying these patches. I ran the
glibc testsuite in userspace and I don't see any regressions.

Alistair Francis (2):
  linux-user: Protect more syscalls
  linux-user/riscv: Update the syscall_nr's to the 5.5 kernel

 linux-user/riscv/syscall_nr.h | 160 +-
 linux-user/strace.c   |   2 +
 linux-user/syscall.c  |  18 
 3 files changed, 178 insertions(+), 2 deletions(-)

-- 
2.25.0




Re: [PATCH v2] sh4: Fix PCI ISA IO memory subregion

2020-02-20 Thread Guenter Roeck
Hi Peter,

On Thu, Feb 20, 2020 at 03:06:05PM +, Peter Maydell wrote:
> On Tue, 18 Feb 2020 at 20:10, Guenter Roeck  wrote:
> 
> I'll put this in via target-arm.next, since we don't really
> have a more active sh4-specific tree to send it via.
> 

Thanks for picking up all my patches, and even more thanks for
your patient reviews and for correcting all my errors.

Guenter



Re: [PATCH] tcg: gdbstub: Fix single-step issue on arm target

2020-02-20 Thread Luc Michel
Hi,

On 2/20/20 4:58 PM, Changbin Du wrote:
> Recently when debugging an arm32 system on qemu, I found sometimes the
> single-step command (stepi) is not working. This can be reproduced by
> below steps:
>  1) start qemu-system-arm -s -S .. and wait for gdb connection.
>  2) start gdb and connect to qemu. In my case, gdb gets a wrong value
> (0x60) for PC.
>  3) After connected, type 'stepi' and expect it will stop at next ins.
> 
> But, it has never stopped. This because:
>  1) We doesn't report ‘vContSupported’ feature to gdb explicitly and gdb
> think we do not support it. In this case, gdb use a software breakpoint
> to emulate single-step.
>  2) Since gdb gets a wrong initial value of PC, then gdb inserts a
> breakpoint to wrong place (PC+4).
> 
> Since we do support ‘vContSupported’ query command, so let's tell gdb that
> we support it.
> 
> Before this change, gdb send below 'Z0' packet to implement single-step:
> gdb_handle_packet: Z0,4,4
> 
> After this change, gdb send "vCont;s.." which is expected:
> gdb_handle_packet: vCont?
> put_packet: vCont;c;C;s;S
> gdb_handle_packet: vCont;s:p1.1;c:p1.-1
I'm curious, I never experienced this behaviour from GDB. What GDB and
QEMU versions are you using?

On my side (GDB 9.1), even without 'vContSupported+' in the 'qSupported'
answer, GDB sends a 'vCont?' packet on the first stepi:

0x in ?? ()
(gdb) si
Sending packet: $m0,4#fd...Ack
Packet received: 
Sending packet: $vCont?#49...Ack
Packet received: vCont;c;C;s;S
Packet vCont (verbose-resume) is supported
Sending packet: $vCont;s:p1.1;c:p1.-1#f7...Ack
Packet received: T05thread:p01.01;

Your second issue (wrong PC value) should be investigated though. Does
it happen on QEMU vanilla? Do you have a way to reproduce this bug?

Anyway after re-reading the GDB remote protocol documentation, I think
your patch is right, the feature should be advertised.

However I think your commit message needs some modifications. This fix
is not specific to ARM or TCG, but to the gdbstub itself. You also
mention this bug you have with PC, which is not related to the bug you
are fixing here. Could you rewrite it in a more generic way? You simply
need to emphasis the effect of advertising the 'vContSupported+' feature
on GDB.

Thanks.

-- 
Luc



Re: [PATCH v2 2/2] hw: move timer_new from init() into realize() to avoid memleaks

2020-02-20 Thread Peter Maydell
On Thu, 20 Feb 2020 at 18:52, Philippe Mathieu-Daudé  wrote:
>
> On 2/20/20 6:56 PM, Peter Maydell wrote:
> > On Mon, 17 Feb 2020 at 03:22,  wrote:
> >>
> >> From: Pan Nengyuan 
> >>
> >> There are some memleaks when we call 'device_list_properties'. This patch 
> >> move timer_new from init into realize to fix it.
> >> Meanwhile, do the null check in mos6522_reset() to avoid null deref if we 
> >> move timer_new into realize().
> >>
> >> Reported-by: Euler Robot 
> >> Signed-off-by: Pan Nengyuan 
> >> Reviewed-by: Philippe Mathieu-Daudé 
> >
> >
> >> diff --git a/hw/misc/mos6522.c b/hw/misc/mos6522.c
> >> index 19e154b870..980eda7599 100644
> >> --- a/hw/misc/mos6522.c
> >> +++ b/hw/misc/mos6522.c
> >> @@ -465,11 +465,15 @@ static void mos6522_reset(DeviceState *dev)
> >>   s->timers[0].frequency = s->frequency;
> >>   s->timers[0].latch = 0x;
> >>   set_counter(s, >timers[0], 0x);
> >> -timer_del(s->timers[0].timer);
> >> +if (s->timers[0].timer) {
> >> +timer_del(s->timers[0].timer);
> >> +}
> >>
> >>   s->timers[1].frequency = s->frequency;
> >>   s->timers[1].latch = 0x;
> >> -timer_del(s->timers[1].timer);
> >> +if (s->timers[1].timer) {
> >> +timer_del(s->timers[1].timer);
> >> +}
> >>   }
> >
> > What code path calls a device 'reset' method on a device
> > that has not yet been realized ? I wasn't expecting that
> > to be valid...
>
> This is not valid. What I understood while reviewing this patch is on
> reset the timer is removed from the timers list. But this patch miss
> setting timer = NULL in case the device is reset multiple times, here
> can happen a NULL deref.

I should have checked the APIs here.

timer_new() allocates memory and initialises a timer.
timer_del() removes a timer from any list it is on, but
does not deallocate memory. It's the function you call
to stop a timer (and arguably timer_stop() would be a
better name for it).
If you created the timer with timer_init(), then the
code to clean it up is:
 (1) call timer_del() to make sure it's not on any
list of active timers
 (2) call timer_free()

So:
 * the mos6522_reset code is fine as it is
 * if we wanted cleanup code that undoes the timer_new
   then that would be a timer_del() + timer_free().
   This would go in unrealize if the timer_new is put
   in realize, but...
 * ...like the other devices touched in this patch,
   mos6522 isn't user-creatable, so if realize succeeds
   it won't ever be destroyed; so we don't need to
   do that. (This is a little harder to check than
   with most of these devices, since mos6522 is an
   abstract base class for some other devices, but
   I think it's correct.)

Side notes:
 * for new code, rather than using timer_new() or one
   of its sibling functions, prefer timer_init(),
   timer_init_ns(), etc. These take a pointer to a
   pre-existing QEMUTimer, typically one you have
   directly embedded in the device state struct. So
   they don't need to be freed on unrealize (though
   you do still want to make sure the timer is not
   on an active list with timer_del() before the memory
   in the device state struct goes away).
 * maybe timer_free() should call timer_del(),
   rather than obliging the caller to?

thanks
-- PMM



Re: [PATCH v1 13/13] migrate/ram: Tolerate partially changed mappings in postcopy code

2020-02-20 Thread Peter Xu
On Thu, Feb 20, 2020 at 12:24:42PM +0100, David Hildenbrand wrote:
> On 19.02.20 17:17, David Hildenbrand wrote:
> > When we partially change mappings (e.g., mmap over parts of an existing
> > mmap) where we have a userfaultfd handler registered, the handler will
> > implicitly be unregistered from the parts that changed. This is e.g., the
> > case when doing a qemu_ram_remap(), but is also a preparation for RAM
> > blocks with resizable allocations and we're shrinking RAM blocks.
> > 
> > When the mapping is changed and the handler is removed, any waiters are
> > woken up. Trying to place pages will fail. We can simply ignore erors
> > due to that when placing pages - as the mapping changed on the migration
> > destination, also the content is stale. E.g., after shrinking a RAM
> > block, nobody should be using that memory. After doing a
> > qemu_ram_remap(), the old memory is expected to have vanished.
> > 
> > Let's tolerate such errors (but still warn for now) when placing pages.
> > Also, add a comment why unregistering will continue to work even though
> > the mapping might have changed.
> > 
> > Cc: "Dr. David Alan Gilbert" 
> > Cc: Juan Quintela 
> > Cc: Peter Xu 
> > Cc: Andrea Arcangeli 
> > Signed-off-by: David Hildenbrand 
> > ---
> >  migration/postcopy-ram.c | 43 ++--
> >  1 file changed, 37 insertions(+), 6 deletions(-)
> > 
> > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > index c68caf4e42..df9d27c004 100644
> > --- a/migration/postcopy-ram.c
> > +++ b/migration/postcopy-ram.c
> > @@ -506,6 +506,13 @@ static int cleanup_range(RAMBlock *rb, void *opaque)
> >  range_struct.start = (uintptr_t)host_addr;
> >  range_struct.len = length;
> >  
> > +/*
> > + * In case the mapping was partially changed since we enabled userfault
> > + * (esp. when whrinking RAM blocks and we have resizable allocations, 
> > or
> > + * via qemu_ram_remap()), the userfaultfd handler was already removed 
> > for
> > + * the mappings that changed. Unregistering will, however, still work 
> > and
> > + * ignore mappings without a registered handler.
> > + */
> >  if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, _struct)) {
> >  error_report("%s: userfault unregister %s", __func__, 
> > strerror(errno));
> >  
> > @@ -1239,10 +1246,28 @@ int postcopy_place_page(MigrationIncomingState 
> > *mis, void *host, void *from,
> >   */
> >  if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) {
> >  int e = errno;
> > -error_report("%s: %s copy host: %p from: %p (size: %zd)",
> > - __func__, strerror(e), host, from, pagesize);
> >  
> > -return -e;
> > +/*
> > + * When the mapping gets partially changed before we try to place 
> > a page
> > + * (esp. when whrinking RAM blocks and we have resizable 
> > allocations, or
> > + * via qemu_ram_remap()), the userfaultfd handler will be removed 
> > and
> > + * placing pages will fail. In that case, any waiter was already 
> > woken
> > + * up when the mapping was changed. We can safely ignore this, as
> > + * mappings that change once we're running on the destination imply
> > + * that memory of these mappings vanishes. Let's still print a 
> > warning
> > + * for now.
> > + *
> 
> After talking to Andrea, on mapping changes, no waiter will be woken up
> automatically. We have to do an UFFDIO_WAKE, which even works when there
> is no longer a handler registered for that reason. Interesting stuff :)

Yes actually it makes sense. :)

Though I do think it should hardly happen, otherwise even if it's
waked up it'll still try to access that GPA and KVM will be confused
on that and exit because no memslot was setup for that.  Then I think
it's a fatal VM error.  In other words, I feel like the resizing
should be blocked somehow by that stall vcpu too (e.g., even if we
want to reboot a Linux guest, it'll sync between vcpus, and same to
bootstraping).

Btw, I feel like we cannot always depend on the fact that userfaultfd
will dissapear itself if the VMA is unmapped, because even it's true
it'll only be true for shrinking of memories.  How about extending
memory in the future?  So IIUC if we want to really fix this, we
probably need to take care of uffd register and unregister of changed
memory regions, which AFAIUI can be done inside your newly introduced
resize hook...

We probably need to take care of other things that might be related to
ramblock resizing too in the same notifier.  One I can think of is to
realloc the ramblock.receivedmap otherwise we could have some bit
cleared forever for shrinking memories (which logically when migration
finishes that bitmap should be all set).

Thanks,

-- 
Peter Xu




Re: [PATCH v1 10/13] migrate/ram: Handle RAM block resizes during postcopy

2020-02-20 Thread Peter Xu
On Wed, Feb 19, 2020 at 05:17:22PM +0100, David Hildenbrand wrote:
> Resizing while migrating is dangerous and does not work as expected.
> The whole migration code works on the usable_length of ram blocks and does
> not expect this to change at random points in time.
> 
> In the case of postcopy, relying on used_length is racy as soon as the
> guest is running. Also, when used_length changes we might leave the
> uffd handler registered for some memory regions, reject valid pages
> when migrating and fail when sending the recv bitmap to the source.
> 
> Resizing can be trigger *after* (but not during) a reset in
> ACPI code by the guest
> - hw/arm/virt-acpi-build.c:acpi_ram_update()
> - hw/i386/acpi-build.c:acpi_ram_update()
> 
> Let's remember the original used_length in a separate variable and
> use it in relevant postcopy code. Make sure to update it when we resize
> during precopy, when synchronizing the RAM block sizes with the source.
> 
> Cc: "Dr. David Alan Gilbert" 
> Cc: Juan Quintela 
> Cc: Eduardo Habkost 
> Cc: Paolo Bonzini 
> Cc: Igor Mammedov 
> Cc: "Michael S. Tsirkin" 
> Cc: Richard Henderson 
> Cc: Shannon Zhao 
> Cc: Alex Bennée 
> Cc: Peter Xu 
> Signed-off-by: David Hildenbrand 
> ---
>  include/exec/ramblock.h  |  9 +
>  migration/postcopy-ram.c | 15 ---
>  migration/ram.c  | 11 +--
>  3 files changed, 30 insertions(+), 5 deletions(-)
> 
> diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
> index 07d50864d8..0e9e9b346b 100644
> --- a/include/exec/ramblock.h
> +++ b/include/exec/ramblock.h
> @@ -59,6 +59,15 @@ struct RAMBlock {
>   */
>  unsigned long *clear_bmap;
>  uint8_t clear_bmap_shift;
> +
> +/*
> + * RAM block used_length before the guest started running while postcopy
> + * was active. Once the guest is running, used_length can change. Used to
> + * register/unregister uffd handlers and as the size of the recv bitmap.
> + * Receiving any page beyond this length will bail out, as it could not 
> have
> + * been valid on the source.
> + */
> +ram_addr_t postcopy_length;
>  };
>  #endif
>  #endif
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index a36402722b..c68caf4e42 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -17,6 +17,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qemu/rcu.h"
>  #include "exec/target_page.h"
>  #include "migration.h"
>  #include "qemu-file.h"
> @@ -31,6 +32,7 @@
>  #include "qemu/error-report.h"
>  #include "trace.h"
>  #include "hw/boards.h"
> +#include "exec/ramblock.h"
>  
>  /* Arbitrary limit on size of each discard command,
>   * keeps them around ~200 bytes
> @@ -456,6 +458,13 @@ static int init_range(RAMBlock *rb, void *opaque)
>  ram_addr_t length = qemu_ram_get_used_length(rb);
>  trace_postcopy_init_range(block_name, host_addr, offset, length);
>  
> +/*
> + * Save the used_length before running the guest. In case we have to
> + * resize RAM blocks when syncing RAM block sizes from the source during
> + * precopy, we'll update it manually via the ram block notifier.
> + */
> +rb->postcopy_length = length;
> +
>  /*
>   * We need the whole of RAM to be truly empty for postcopy, so things
>   * like ROMs and any data tables built during init must be zero'd
> @@ -478,7 +487,7 @@ static int cleanup_range(RAMBlock *rb, void *opaque)
>  const char *block_name = qemu_ram_get_idstr(rb);
>  void *host_addr = qemu_ram_get_host_addr(rb);
>  ram_addr_t offset = qemu_ram_get_offset(rb);
> -ram_addr_t length = qemu_ram_get_used_length(rb);
> +ram_addr_t length = rb->postcopy_length;
>  MigrationIncomingState *mis = opaque;
>  struct uffdio_range range_struct;
>  trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
> @@ -600,7 +609,7 @@ static int nhp_range(RAMBlock *rb, void *opaque)
>  const char *block_name = qemu_ram_get_idstr(rb);
>  void *host_addr = qemu_ram_get_host_addr(rb);
>  ram_addr_t offset = qemu_ram_get_offset(rb);
> -ram_addr_t length = qemu_ram_get_used_length(rb);
> +ram_addr_t length = rb->postcopy_length;
>  trace_postcopy_nhp_range(block_name, host_addr, offset, length);
>  
>  /*
> @@ -644,7 +653,7 @@ static int ram_block_enable_notify(RAMBlock *rb, void 
> *opaque)
>  struct uffdio_register reg_struct;
>  
>  reg_struct.range.start = (uintptr_t)qemu_ram_get_host_addr(rb);
> -reg_struct.range.len = qemu_ram_get_used_length(rb);
> +reg_struct.range.len = rb->postcopy_length;
>  reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
>  
>  /* Now tell our userfault_fd that it's responsible for this area */
> diff --git a/migration/ram.c b/migration/ram.c
> index ab1f5534cf..6d1dcb362c 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -244,7 +244,7 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file,
>  return -1;
>  }
>  
> -   

Re: [PATCH v3 01/20] scripts/git.orderfile: Display Cocci scripts before code modifications

2020-02-20 Thread Michael S. Tsirkin
On Thu, Feb 20, 2020 at 02:05:29PM +0100, Philippe Mathieu-Daudé wrote:
> When we use a Coccinelle semantic script to do automatic
> code modifications, it makes sense to look at the semantic
> patch first.
> 
> Signed-off-by: Philippe Mathieu-Daudé 

Reviewed-by: Michael S. Tsirkin 

> ---
>  scripts/git.orderfile | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/scripts/git.orderfile b/scripts/git.orderfile
> index 1f747b583a..7cf22e0bf5 100644
> --- a/scripts/git.orderfile
> +++ b/scripts/git.orderfile
> @@ -22,6 +22,9 @@ Makefile*
>  qapi/*.json
>  qga/*.json
>  
> +# semantic patches
> +*.cocci
> +
>  # headers
>  *.h
>  
> -- 
> 2.21.1




Re: [PULL 06/18] qemu-img: Add --target-is-zero to convert

2020-02-20 Thread David Edmondson
On Thursday, 2020-02-20 at 10:36:04 -06, Eric Blake wrote:

> On 2/20/20 10:06 AM, Max Reitz wrote:
>> From: David Edmondson 
>> 
>> In many cases the target of a convert operation is a newly provisioned
>> target that the user knows is blank (reads as zero). In this situation
>> there is no requirement for qemu-img to wastefully zero out the entire
>> device.
>> 
>> Add a new option, --target-is-zero, allowing the user to indicate that
>> an existing target device will return zeros for all reads.
>> 
>> Signed-off-by: David Edmondson 
>> Message-Id: <20200205110248.2009589-2-david.edmond...@oracle.com>
>> Reviewed-by: Vladimir Sementsov-Ogievskiy 
>> Reviewed-by: Eric Blake 
>> Signed-off-by: Max Reitz 
>> ---
>>   docs/interop/qemu-img.rst |  9 -
>>   qemu-img-cmds.hx  |  4 ++--
>>   qemu-img.c| 26 +++---
>>   3 files changed, 33 insertions(+), 6 deletions(-)
>> 
>> diff --git a/docs/interop/qemu-img.rst b/docs/interop/qemu-img.rst
>> index 42e4451db4..5f40137c10 100644
>> --- a/docs/interop/qemu-img.rst
>> +++ b/docs/interop/qemu-img.rst
>> @@ -214,6 +214,13 @@ Parameters to convert subcommand:
>> will still be printed.  Areas that cannot be read from the source will be
>> treated as containing only zeroes.
>>   
>> +.. option:: --target-is-zero
>> +
>> +  Assume that reading the destination image will always return
>> +  zeros. This parameter is mutually exclusive with a destination image
>
> Late tweak now that this is in a pull request, so we may want a followup 
> patch, but:
>
> The image doesn't always return zeros after we write to it, maybe we 
> should tweak this sentence:
>
> Assume that reading the destination image will initially return all zeros.

I will send a patch for this.

> Also, my earlier comment about 'zeroes' one line before 'zeros' still 
> applies - although both spellings are valid, we look inconsistent when 
> we can't make up our mind within two adjacent paragraphs.

If we can agree on one of "zeros" or "zeroes" then I'm happy to send a
patch making it consistent everywhere.

I think that given there are existing functions with "zeroes" in the
name, I'd be inclined to go that way.

dme.
-- 
Why stay in college? Why go to night school?



Re: [PATCH v1 05/13] migrate/ram: Handle RAM block resizes during precopy

2020-02-20 Thread Peter Xu
On Thu, Feb 20, 2020 at 04:16:02PM +0100, David Hildenbrand wrote:
> On 19.02.20 17:17, David Hildenbrand wrote:
> > Resizing while migrating is dangerous and does not work as expected.
> > The whole migration code works on the usable_length of ram blocks and does
> > not expect this to change at random points in time.
> > 
> > In the case of precopy, the ram block size must not change on the source,
> > after syncing the RAM block list in ram_save_setup(), so as long as the
> > guest is still running on the source.
> > 
> > Resizing can be trigger *after* (but not during) a reset in
> > ACPI code by the guest
> > - hw/arm/virt-acpi-build.c:acpi_ram_update()
> > - hw/i386/acpi-build.c:acpi_ram_update()
> > 
> > Use the ram block notifier to get notified about resizes. Let's simply
> > cancel migration and indicate the reason. We'll continue running on the
> > source. No harm done.
> > 
> > Update the documentation. Postcopy will be handled separately.
> > 
> > Cc: "Dr. David Alan Gilbert" 
> > Cc: Juan Quintela 
> > Cc: Eduardo Habkost 
> > Cc: Paolo Bonzini 
> > Cc: Igor Mammedov 
> > Cc: "Michael S. Tsirkin" 
> > Cc: Richard Henderson 
> > Cc: Shannon Zhao 
> > Cc: Alex Bennée 
> > Cc: Peter Xu 
> > Signed-off-by: David Hildenbrand 
> > ---
> >  exec.c|  5 +++--
> >  include/exec/memory.h | 10 ++
> >  migration/migration.c |  9 +++--
> >  migration/migration.h |  1 +
> >  migration/ram.c   | 41 +
> >  5 files changed, 58 insertions(+), 8 deletions(-)
> > 
> > diff --git a/exec.c b/exec.c
> > index b75250e773..8b015821d6 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -2120,8 +2120,9 @@ static int memory_try_enable_merging(void *addr, 
> > size_t len)
> >  return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
> >  }
> >  
> > -/* Only legal before guest might have detected the memory size: e.g. on
> > - * incoming migration, or right after reset.
> > +/*
> > + * Resizing RAM while migrating can result in the migration being canceled.
> > + * Care has to be taken if the guest might have already detected the 
> > memory.
> >   *
> >   * As memory core doesn't know how is memory accessed, it is up to
> >   * resize callback to update device state and/or add assertions to detect
> > diff --git a/include/exec/memory.h b/include/exec/memory.h
> > index e85b7de99a..de111347e8 100644
> > --- a/include/exec/memory.h
> > +++ b/include/exec/memory.h
> > @@ -113,7 +113,7 @@ typedef struct IOMMUNotifier IOMMUNotifier;
> >  #define RAM_SHARED (1 << 1)
> >  
> >  /* Only a portion of RAM (used_length) is actually used, and migrated.
> > - * This used_length size can change across reboots.
> > + * Resizing RAM while migrating can result in the migration being canceled.
> >   */
> >  #define RAM_RESIZEABLE (1 << 2)
> >  
> > @@ -843,7 +843,9 @@ void 
> > memory_region_init_ram_shared_nomigrate(MemoryRegion *mr,
> >   * RAM.  Accesses into the region will
> >   * modify memory directly.  Only an 
> > initial
> >   * portion of this RAM is actually 
> > used.
> > - * The used size can change across 
> > reboots.
> > + * Changing the size while migrating
> > + * can result in the migration being
> > + * canceled.
> >   *
> >   * @mr: the #MemoryRegion to be initialized.
> >   * @owner: the object that tracks the region's reference count
> > @@ -1464,8 +1466,8 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr);
> >  
> >  /* memory_region_ram_resize: Resize a RAM region.
> >   *
> > - * Only legal before guest might have detected the memory size: e.g. on
> > - * incoming migration, or right after reset.
> > + * Resizing RAM while migrating can result in the migration being canceled.
> > + * Care has to be taken if the guest might have already detected the 
> > memory.
> >   *
> >   * @mr: a memory region created with @memory_region_init_resizeable_ram.
> >   * @newsize: the new size the region
> > diff --git a/migration/migration.c b/migration/migration.c
> > index 8fb68795dc..ac9751dbe5 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -175,13 +175,18 @@ void migration_object_init(void)
> >  }
> >  }
> >  
> > +void migration_cancel(void)
> > +{
> > +migrate_fd_cancel(current_migration);
> > +}
> > +
> >  void migration_shutdown(void)
> >  {
> >  /*
> >   * Cancel the current migration - that will (eventually)
> >   * stop the migration using this structure
> >   */
> > -migrate_fd_cancel(current_migration);
> > +migration_cancel();
> >  object_unref(OBJECT(current_migration));
> >  }
> >  
> > @@ -2019,7 +2024,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool 
> > blk,
> >  
> >  void qmp_migrate_cancel(Error **errp)
> >  {
> 

Re: [RFC PATCH v2] target/ppc: Enable hardfloat for PPC

2020-02-20 Thread Richard Henderson
On 2/18/20 9:10 AM, BALATON Zoltan wrote:
>  void helper_reset_fpstatus(CPUPPCState *env)
>  {
> -set_float_exception_flags(0, >fp_status);
> +set_float_exception_flags(env->default_fp_excpt_flags, >fp_status);
>  }

What I don't like is the forced setting of inexact.  I don't mind leaving it
set if it is already set, which corresponds to the normal accumulation of
exceptions.

In addition, if the inexact exception is unmasked, I would expect a signal to
be delivered only when an inexact exception happens.  Whereas this patch would
deliver a signal for every fp operation.

It should be just as easy to do

flags = get_float_exception_flags(status);
flags &= env->save_fp_exception_flags;
set_float_exception_flags(flags, status);


> +DEFINE_PROP_BOOL("hardfloat", PowerPCCPU, hardfloat, true),

I would also prefer a different name here -- perhaps x-no-fp-fi.


r~



Re: [PATCH] console: make QMP screendump use coroutine

2020-02-20 Thread Dr. David Alan Gilbert
* Markus Armbruster (arm...@redhat.com) wrote:
> Cc: David for questions regarding the HMP core.  David, please look for
> "Is HMP blocking the main loop a problem?"
> 
> Marc-André Lureau  writes:
> 
> > Hi
> >
> > On Thu, Feb 20, 2020 at 8:49 AM Markus Armbruster  wrote:
> >>
> >> Marc-André Lureau  writes:
> >>
> >> > Thanks to the QMP coroutine support, the screendump handler can
> >> > trigger a graphic_hw_update(), yield and let the main loop run until
> >> > update is done. Then the handler is resumed, and the ppm_save() will
> >> > write the screen image to disk in the coroutine context (thus
> >> > non-blocking).
> >> >
> >> > For now, HMP doesn't have coroutine support, so it remains potentially
> >> > outdated or glitched.
> >> >
> >> > Fixes:
> >> > https://bugzilla.redhat.com/show_bug.cgi?id=1230527
> >> >
> >> > Based-on: <20200109183545.27452-2-kw...@redhat.com>
> >> >
> >> > Cc: Kevin Wolf 
> >> > Signed-off-by: Marc-André Lureau 
> >> > ---
> >> >  qapi/ui.json|  3 ++-
> >> >  ui/console.c| 35 +++
> >> >  ui/trace-events |  2 +-
> >> >  3 files changed, 30 insertions(+), 10 deletions(-)
> >> >
> >> > diff --git a/qapi/ui.json b/qapi/ui.json
> >> > index e04525d8b4..d941202f34 100644
> >> > --- a/qapi/ui.json
> >> > +++ b/qapi/ui.json
> >> > @@ -96,7 +96,8 @@
> >> >  #
> >> >  ##
> >> >  { 'command': 'screendump',
> >> > -  'data': {'filename': 'str', '*device': 'str', '*head': 'int'} }
> >> > +  'data': {'filename': 'str', '*device': 'str', '*head': 'int'},
> >> > +  'coroutine': true }
> >> >
> >> >  ##
> >> >  # == Spice
> >> > diff --git a/ui/console.c b/ui/console.c
> >> > index ac79d679f5..db184b473f 100644
> >> > --- a/ui/console.c
> >> > +++ b/ui/console.c
> >> > @@ -167,6 +167,7 @@ struct QemuConsole {
> >> >  QEMUFIFO out_fifo;
> >> >  uint8_t out_fifo_buf[16];
> >> >  QEMUTimer *kbd_timer;
> >> > +Coroutine *screendump_co;
> >> >
> >> >  QTAILQ_ENTRY(QemuConsole) next;
> >> >  };
> >> > @@ -194,7 +195,6 @@ static void dpy_refresh(DisplayState *s);
> >> >  static DisplayState *get_alloc_displaystate(void);
> >> >  static void text_console_update_cursor_timer(void);
> >> >  static void text_console_update_cursor(void *opaque);
> >> > -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp);
> >> >
> >> >  static void gui_update(void *opaque)
> >> >  {
> >> > @@ -263,6 +263,9 @@ static void gui_setup_refresh(DisplayState *ds)
> >> >
> >> >  void graphic_hw_update_done(QemuConsole *con)
> >> >  {
> >> > +if (con && con->screendump_co) {
> >>
> >> How can !con happen?
> >
> > I don't think it can happen anymore (the patch evolved over several
> > years, this is probably a left-over). In any case, it doesn't hurt.
> 
> I hate such dead checks, because they make me assume they can actually
> happen.  Incorrect assumptions breed bugs.
> 
> But I'm willing to defer to the maintainer here.  Gerd?
> 
> >> > +aio_co_wake(con->screendump_co);
> >> > +}
> >> >  }
> >> >
> >> >  void graphic_hw_update(QemuConsole *con)
> >> > @@ -310,16 +313,16 @@ void graphic_hw_invalidate(QemuConsole *con)
> >> >  }
> >> >  }
> >> >
> >> > -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
> >> > +static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
> >> >  {
> >> > -int width = pixman_image_get_width(ds->image);
> >> > -int height = pixman_image_get_height(ds->image);
> >> > +int width = pixman_image_get_width(image);
> >> > +int height = pixman_image_get_height(image);
> >> >  g_autoptr(Object) ioc = OBJECT(qio_channel_file_new_fd(fd));
> >> >  g_autofree char *header = NULL;
> >> >  g_autoptr(pixman_image_t) linebuf = NULL;
> >> >  int y;
> >> >
> >> > -trace_ppm_save(fd, ds);
> >> > +trace_ppm_save(fd, image);
> >> >
> >> >  header = g_strdup_printf("P6\n%d %d\n%d\n", width, height, 255);
> >> >  if (qio_channel_write_all(QIO_CHANNEL(ioc),
> >> > @@ -329,7 +332,7 @@ static bool ppm_save(int fd, DisplaySurface *ds, 
> >> > Error **errp)
> >> >
> >> >  linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
> >> >  for (y = 0; y < height; y++) {
> >> > -qemu_pixman_linebuf_fill(linebuf, ds->image, width, 0, y);
> >> > +qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
> >> >  if (qio_channel_write_all(QIO_CHANNEL(ioc),
> >> >(char 
> >> > *)pixman_image_get_data(linebuf),
> >> >pixman_image_get_stride(linebuf), 
> >> > errp) < 0) {
> >>
> >> Looks like an unrelated optimization / simplification.  If I was
> >> maintainer, I'd ask for a separate patch.
> >
> > I can be split, but it's related. We should pass a reference to
> > pixman_image_t, rather than a pointer to DisplaySurface, as the
> > underlying image may change over time, and would result in corrupted
> > coroutine save or worse.
> 
> Work that into your commit message, 

Re: [PATCH v9 2/3] Acceptance test: add "boot_linux" tests

2020-02-20 Thread Cleber Rosa
On Thu, Feb 20, 2020 at 01:49:40PM -0300, Wainer dos Santos Moschetta wrote:
> On 2/19/20 11:06 PM, Cleber Rosa wrote:
> > +
> > +def test_virt_tcg(self):
> > +"""
> > +:avocado: tags=accel:tcg
> > +:avocado: tags=cpu:cortex-a53
> > +"""
> > +if not tcg_available(self.qemu_bin):
> > +self.cancel(TCG_NOT_AVAILABLE)
> > +self.vm.add_args("-accel", "tcg")
> > +self.vm.add_args('-cpu', 'cortex-a53')
> > +self.add_common_args()
> > +self.launch_and_wait()
> > +
> > +def test_virt_kvm(self):
> > +"""
> > +:avocado: tags=accel:kvm
> > +:avocado: tags=cpu:host
> > +"""
> > +if not kvm_available(self.arch, self.qemu_bin):
> > +self.cancel(KVM_NOT_AVAILABLE)
> > +self.vm.add_args("-accel", "kvm")
> > +self.vm.add_args("-cpu", "host")
> > +self.add_common_args()
> > +self.launch_and_wait()
> 
> 
> For aarch64 tests it seems '-cpu max' is the best choice. See in
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg672755.html
> 
>

+drew

Thanks for pointing that out.  There's one thing, though, which I can
not agree on.  And I know that Drew is an expert on the matter, which
makes it harder to disagree on... but, I've got results which clearly
indicate that *not using* the gic-version machine parameter still gets
me KVM:

./tests/venv/bin/avocado run 
tests/acceptance/boot_linux.py:BootLinuxAarch64.test_virt_kvm
JOB ID : 21a394b884b474ceee0a045b3e74f98da0aee023
JOB LOG: 
/home/cleber/avocado/job-results/job-2020-02-20T14.28-21a394b/job.log
 (1/1) tests/acceptance/boot_linux.py:BootLinuxAarch64.test_virt_kvm: PASS 
(35.10 s)
RESULTS: PASS 1 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 | CANCEL 0
JOB TIME   : 35.87 s

VM launch command:
   aarch64-softmmu/qemu-system-aarch64
   -display none
   -vga none
   -chardev socket,id=mon,path=/var/tmp/tmpntz_r_h7/qemu-18331-monitor.sock
   -mon chardev=mon,mode=control
   -machine virt
   -chardev 
socket,id=console,path=/var/tmp/tmpntz_r_h7/qemu-18331-console.sock,server,nowait
   -serial chardev:console
   -smp 2
   -m 1024
   -drive 
file=/var/tmp/avocado_u9jm04di/avocado_job_28oth9kk/1-tests_acceptance_boot_linux.py_BootLinuxAarch64.test_virt_kvm/Fedora-Cloud-Base-31-1.9.aarch64-05265df5.qcow2
 -drive 
file=/var/tmp/avocado_u9jm04di/avocado_job_28oth9kk/1-tests_acceptance_boot_linux.py_BootLinuxAarch64.test_virt_kvm/cloudinit.iso,format=raw
   -accel kvm
   -cpu host
   -bios /home/cleber/build/qemu/pc-bios/edk2-aarch64-code.fd
   -device virtio-rng-pci,rng=rng0
   -object rng-random,id=rng0,filename=/dev/urandom

Guest boot messages shows:
[1.538955] systemd[1]: Detected virtualization kvm.
[1.539828] systemd[1]: Detected architecture arm64.

This is in contrast with:

./tests/venv/bin/avocado run 
tests/acceptance/boot_linux.py:BootLinuxAarch64.test_virt_tcg 
JOB ID : 90b9412f700e52428b59e97719496c30b4f54435
JOB LOG: 
/home/cleber/avocado/job-results/job-2020-02-20T14.32-90b9412/job.log
 (1/1) tests/acceptance/boot_linux.py:BootLinuxAarch64.test_virt_tcg: PASS 
(581.14 s)
RESULTS: PASS 1 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 | CANCEL 0
JOB TIME   : 581.93 s

VM launch command:
   aarch64-softmmu/qemu-system-aarch64
   -display none
   -vga none
   -chardev socket,id=mon,path=/var/tmp/tmpa6i4livg/qemu-18498-monitor.sock
   -mon chardev=mon,mode=control
   -machine virt
   -chardev 
socket,id=console,path=/var/tmp/tmpa6i4livg/qemu-18498-console.sock,server,nowait
   -serial chardev:console
   -smp 2
   -m 1024
   -drive 
file=/var/tmp/avocado_slcj2x9e/avocado_job_x5u__309/1-tests_acceptance_boot_linux.py_BootLinuxAarch64.test_virt_tcg/Fedora-Cloud-Base-31-1.9.aarch64-5b006a2f.qcow2
 -drive 
file=/var/tmp/avocado_slcj2x9e/avocado_job_x5u__309/1-tests_acceptance_boot_linux.py_BootLinuxAarch64.test_virt_tcg/cloudinit.iso,format=raw
   -accel tcg
   -cpu cortex-a53
   -bios /home/cleber/build/qemu/pc-bios/edk2-aarch64-code.fd
   -device virtio-rng-pci,rng=rng0
   -object rng-random,id=rng0,filename=/dev/urandom'

Guest boot messages shows:
[   28.606310] systemd[1]: Detected virtualization qemu.
[   28.607861] systemd[1]: Detected architecture arm64.

And with regards to the CPU type, IIRC, "max" will fallback to the
best CPU on TCG mode.  As a general best practice in testing, I'd
rather not have this dynamic aspect where we can avoid it.  Looks like
with TCG we can set it to one CPU and validate that the guests work on
that configuration.

IIUC, by using either "-cpu host" or "-cpu max" for KVM, we may end up
having the same test PASS or FAIL because of the (dynamic) host CPU.
That's not ideal for testing purposes, but given it's outside of our
control, do best we can do is keep track of the host CPU (via Avocado's
sysinfo collection).

Also, I've used the same CPU model that has been used on

Re: The issues about architecture of the COLO checkpoint

2020-02-20 Thread Dr. David Alan Gilbert
* Daniel Cho (daniel...@qnap.com) wrote:
> Hi Hailiang,
> 
> I have already patched the file to my branch, but there is a problem while
> doing migration.
> Here is the error message from SVM
> "qemu-system-x86_64: /root/download/qemu-4.1.0/memory.c:1079:
> memory_region_transaction_commit: Assertion `qemu_mutex_iothread_locked()'
> failed."

It's probably worth getting the full backtrace.

Dave

> Do you have this problem?
> 
> Best regards,
> Daniel Cho
> 
> Daniel Cho  於 2020年2月20日 週四 上午11:49寫道:
> 
> > Hi Zhang,
> >
> > Thanks, I will configure on code for testing first.
> > However, if you have free time, could you please send the patch file to
> > us, Thanks.
> >
> > Best Regard,
> > Daniel Cho
> >
> >
> > Zhang, Chen  於 2020年2月20日 週四 上午11:07寫道:
> >
> >>
> >> On 2/18/2020 5:22 PM, Daniel Cho wrote:
> >>
> >> Hi Hailiang,
> >> Thanks for your help. If we have any problems we will contact you for
> >> your favor.
> >>
> >>
> >> Hi Zhang,
> >>
> >> " If colo-compare got a primary packet without related secondary packet
> >> in a certain time , it will automatically trigger checkpoint.  "
> >> As you said, the colo-compare will trigger checkpoint, but does it need
> >> to limit checkpoint times?
> >> There is a problem about doing many checkpoints while we use fio to
> >> random write files. Then it will cause low throughput on PVM.
> >> Is this situation is normal on COLO?
> >>
> >>
> >> Hi Daniel,
> >>
> >> The checkpoint time is designed to be user adjustable based on user
> >> environment(workload/network status/business conditions...).
> >>
> >> In net/colo-compare.c
> >>
> >> /* TODO: Should be configurable */
> >> #define REGULAR_PACKET_CHECK_MS 3000
> >>
> >> If you need, I can send a patch for this issue. Make users can change the
> >> value by QMP and qemu monitor commands.
> >>
> >> Thanks
> >>
> >> Zhang Chen
> >>
> >>
> >>
> >> Best regards,
> >> Daniel Cho
> >>
> >> Zhang, Chen  於 2020年2月17日 週一 下午1:36寫道:
> >>
> >>>
> >>> On 2/15/2020 11:35 AM, Daniel Cho wrote:
> >>>
> >>> Hi Dave,
> >>>
> >>> Yes, I agree with you, it does need a timeout.
> >>>
> >>>
> >>> Hi Daniel and Dave,
> >>>
> >>> Current colo-compare already have the timeout mechanism.
> >>>
> >>> Named packet_check_timer,  It will scan primary packet queue to make
> >>> sure all the primary packet not stay too long time.
> >>>
> >>> If colo-compare got a primary packet without related secondary packet in
> >>> a certain time , it will automatic trigger checkpoint.
> >>>
> >>> https://github.com/qemu/qemu/blob/master/net/colo-compare.c#L847
> >>>
> >>>
> >>> Thanks
> >>>
> >>> Zhang Chen
> >>>
> >>>
> >>>
> >>> Hi Hailiang,
> >>>
> >>> We base on qemu-4.1.0 for using COLO feature, in your patch, we found a
> >>> lot of difference  between your version and ours.
> >>> Could you give us a latest release version which is close your
> >>> developing code?
> >>>
> >>> Thanks.
> >>>
> >>> Regards
> >>> Daniel Cho
> >>>
> >>> Dr. David Alan Gilbert  於 2020年2月13日 週四 下午6:38寫道:
> >>>
>  * Daniel Cho (daniel...@qnap.com) wrote:
>  > Hi Hailiang,
>  >
>  > 1.
>  > OK, we will try the patch
>  > “0001-COLO-Optimize-memory-back-up-process.patch”,
>  > and thanks for your help.
>  >
>  > 2.
>  > We understand the reason to compare PVM and SVM's packet.
>  However, the
>  > empty of SVM's packet queue might happened on setting COLO feature
>  and SVM
>  > broken.
>  >
>  > On situation 1 ( setting COLO feature ):
>  > We could force do checkpoint after setting COLO feature finish,
>  then it
>  > will protect the state of PVM and SVM . As the Zhang Chen said.
>  >
>  > On situation 2 ( SVM broken ):
>  > COLO will do failover for PVM, so it might not cause any wrong on
>  PVM.
>  >
>  > However, those situations are our views, so there might be a big
>  difference
>  > between reality and our views.
>  > If we have any wrong views and opinions, please let us know, and
>  correct
>  > us.
> 
>  It does need a timeout; the SVM being broken or being in a state where
>  it never sends the corresponding packet (because of a state difference)
>  can happen and COLO needs to timeout when the packet hasn't arrived
>  after a while and trigger the checkpoint.
> 
>  Dave
> 
>  > Thanks.
>  >
>  > Best regards,
>  > Daniel Cho
>  >
>  > Zhang, Chen  於 2020年2月13日 週四 上午10:17寫道:
>  >
>  > > Add cc Jason Wang, he is a network expert.
>  > >
>  > > In case some network things goes wrong.
>  > >
>  > >
>  > >
>  > > Thanks
>  > >
>  > > Zhang Chen
>  > >
>  > >
>  > >
>  > > *From:* Zhang, Chen
>  > > *Sent:* Thursday, February 13, 2020 10:10 AM
>  > > *To:* 'Zhanghailiang' ; Daniel Cho
>  <
>  > > daniel...@qnap.com>
>  > > *Cc:* Dr. David Alan Gilbert ;
>  qemu-devel@nongnu.org
>  > > 

Re: [PATCH v1 1/4] Acceptance test: add address as param

2020-02-20 Thread Wainer dos Santos Moschetta

Hi Oksana,

On 2/14/20 12:52 PM, Oksana Vohchana wrote:

Provides param address in _get_free_port()
because by default it takes free port only on the localhost

Signed-off-by: Oksana Vohchana 
---
  tests/acceptance/migration.py | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/acceptance/migration.py b/tests/acceptance/migration.py
index a8367ca023..e4c39b85a1 100644
--- a/tests/acceptance/migration.py
+++ b/tests/acceptance/migration.py
@@ -52,8 +52,8 @@ class Migration(Test):
  source_vm.qmp('migrate', uri=src_uri)
  self.assert_migration(source_vm, dest_vm)
  
-def _get_free_port(self):

-port = network.find_free_port()
+def _get_free_port(self, address='localhost'):
+port = network.find_free_port(address=address)


Looks good to me, but:

- This patch is out of order, it should be followed by patches 03 and 04.

- You could have described in the cover letter (or in the commit) your 
intention with this change.


Reviewed-by: Wainer dos Santos Moschetta 

Thanks!

- Wainer


  if port is None:
  self.cancel('Failed to find a free port')
  return port





Re: [PATCH v1 07/13] migrate/ram: Get rid of "place_source" in ram_load_postcopy()

2020-02-20 Thread Dr. David Alan Gilbert
* David Hildenbrand (da...@redhat.com) wrote:
> 
> 
> > Am 19.02.2020 um 21:47 schrieb Peter Xu :
> > 
> > On Wed, Feb 19, 2020 at 05:17:19PM +0100, David Hildenbrand wrote:
> >> It's always the same value.
> > 
> > I guess not, because...
> > 
> >> 
> >> Cc: "Dr. David Alan Gilbert" 
> >> Cc: Juan Quintela 
> >> Cc: Peter Xu 
> >> Signed-off-by: David Hildenbrand 
> >> ---
> >> migration/ram.c | 8 +++-
> >> 1 file changed, 3 insertions(+), 5 deletions(-)
> >> 
> >> diff --git a/migration/ram.c b/migration/ram.c
> >> index cbd54947fb..75014717f6 100644
> >> --- a/migration/ram.c
> >> +++ b/migration/ram.c
> >> @@ -3119,7 +3119,6 @@ static int ram_load_postcopy(QEMUFile *f)
> >> ram_addr_t addr;
> >> void *host = NULL;
> >> void *page_buffer = NULL;
> >> -void *place_source = NULL;
> >> RAMBlock *block = NULL;
> >> uint8_t ch;
> >> int len;
> >> @@ -3188,7 +3187,6 @@ static int ram_load_postcopy(QEMUFile *f)
> >> place_needed = true;
> >> target_pages = 0;
> >> }
> >> -place_source = postcopy_host_page;
> >> }
> >> 
> >> switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
> >> @@ -3220,7 +3218,7 @@ static int ram_load_postcopy(QEMUFile *f)
> >>  * buffer to make sure the buffer is valid when
> >>  * placing the page.
> >>  */
> >> -qemu_get_buffer_in_place(f, (uint8_t **)_source,
> > 
> > ... it can be modified inside the call.
> 
> Very right, will drop this patch! Thanks!
> 
> > 
> > I feel like this patch could even fail the QEMU unit test.  It would
> > be good to mention what tests have been carried out in the cover
> > letter or with RFC tag if no test is done yet.
> 
> I test all code I share. This survives „make check“. I assume all tests send 
> small pages where „matches_target_page_size==true“, so the tests did not 
> catch this.
> 
> I even spent the last day getting avocado-vt to work and ran multiple 
> (obviously not all) migration tests, including postcopy, so your suggestions 
> have already been considered ...

A test on Power or aarch might catch this one; where they normally
have larger pages.

Dave

> Could have mentioned that in the cover letter, yes.
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [PATCH v2 2/2] hw: move timer_new from init() into realize() to avoid memleaks

2020-02-20 Thread Philippe Mathieu-Daudé

On 2/20/20 6:56 PM, Peter Maydell wrote:

On Mon, 17 Feb 2020 at 03:22,  wrote:


From: Pan Nengyuan 

There are some memleaks when we call 'device_list_properties'. This patch move 
timer_new from init into realize to fix it.
Meanwhile, do the null check in mos6522_reset() to avoid null deref if we move 
timer_new into realize().

Reported-by: Euler Robot 
Signed-off-by: Pan Nengyuan 
Reviewed-by: Philippe Mathieu-Daudé 




diff --git a/hw/misc/mos6522.c b/hw/misc/mos6522.c
index 19e154b870..980eda7599 100644
--- a/hw/misc/mos6522.c
+++ b/hw/misc/mos6522.c
@@ -465,11 +465,15 @@ static void mos6522_reset(DeviceState *dev)
  s->timers[0].frequency = s->frequency;
  s->timers[0].latch = 0x;
  set_counter(s, >timers[0], 0x);
-timer_del(s->timers[0].timer);
+if (s->timers[0].timer) {
+timer_del(s->timers[0].timer);
+}

  s->timers[1].frequency = s->frequency;
  s->timers[1].latch = 0x;
-timer_del(s->timers[1].timer);
+if (s->timers[1].timer) {
+timer_del(s->timers[1].timer);
+}
  }


What code path calls a device 'reset' method on a device
that has not yet been realized ? I wasn't expecting that
to be valid...


This is not valid. What I understood while reviewing this patch is on 
reset the timer is removed from the timers list. But this patch miss 
setting timer = NULL in case the device is reset multiple times, here 
can happen a NULL deref.




thanks
-- PMM






Re: [PATCH] tcg: gdbstub: Fix single-step issue on arm target

2020-02-20 Thread Philippe Mathieu-Daudé

On 2/20/20 7:06 PM, Laurent Vivier wrote:

Le 20/02/2020 à 18:47, Philippe Mathieu-Daudé a écrit :

On 2/20/20 4:58 PM, Changbin Du wrote:

Recently when debugging an arm32 system on qemu, I found sometimes the
single-step command (stepi) is not working. This can be reproduced by
below steps:
   1) start qemu-system-arm -s -S .. and wait for gdb connection.
   2) start gdb and connect to qemu. In my case, gdb gets a wrong value
  (0x60) for PC.
   3) After connected, type 'stepi' and expect it will stop at next ins.

But, it has never stopped. This because:
   1) We doesn't report ‘vContSupported’ feature to gdb explicitly and gdb
  think we do not support it. In this case, gdb use a software
breakpoint
  to emulate single-step.
   2) Since gdb gets a wrong initial value of PC, then gdb inserts a
  breakpoint to wrong place (PC+4).

Since we do support ‘vContSupported’ query command, so let's tell gdb
that
we support it.

Before this change, gdb send below 'Z0' packet to implement single-step:
gdb_handle_packet: Z0,4,4

After this change, gdb send "vCont;s.." which is expected:
gdb_handle_packet: vCont?
put_packet: vCont;c;C;s;S
gdb_handle_packet: vCont;s:p1.1;c:p1.-1


You actually fixed this for all architectures :)

This has been annoying me on MIPS since more than a year...


The problem started with an update of QEMU or of GDB?

At one point it seemed to work, so what happened?


I'd say gdb. I can try different combinations of QEMU/gdb but I won't do 
that soon.





Re: [PATCH v1 1/1] target/riscv: Correctly implement TSR trap

2020-02-20 Thread Alistair Francis
On Mon, Jan 20, 2020 at 9:43 PM Alistair Francis
 wrote:
>
> As reported in: https://bugs.launchpad.net/qemu/+bug/1851939 we weren't
> correctly handling illegal instructions based on the value of MSTATUS_TSR
> and the current privledge level.
>
> This patch fixes the issue raised in the bug by raising an illegal
> instruction if TSR is set and we are in S-Mode.
>
> Signed-off-by: Alistair Francis 

@Palmer Dabbelt  Ping!

Alistair

> ---
>  target/riscv/op_helper.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
> index 331cc36232..eed8eea6f2 100644
> --- a/target/riscv/op_helper.c
> +++ b/target/riscv/op_helper.c
> @@ -83,7 +83,7 @@ target_ulong helper_sret(CPURISCVState *env, target_ulong 
> cpu_pc_deb)
>  }
>
>  if (env->priv_ver >= PRIV_VERSION_1_10_0 &&
> -get_field(env->mstatus, MSTATUS_TSR)) {
> +get_field(env->mstatus, MSTATUS_TSR) && !(env->priv >= PRV_M)) {
>  riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
>  }
>
> --
> 2.24.1
>



Re: [PATCH 3/3] COLO: Optimize memory back-up process

2020-02-20 Thread Dr. David Alan Gilbert
* Hailiang Zhang (zhang.zhanghaili...@huawei.com) wrote:
> This patch will reduce the downtime of VM for the initial process,
> Privously, we copied all these memory in preparing stage of COLO
> while we need to stop VM, which is a time-consuming process.
> Here we optimize it by a trick, back-up every page while in migration
> process while COLO is enabled, though it affects the speed of the
> migration, but it obviously reduce the downtime of back-up all SVM'S
> memory in COLO preparing stage.
> 
> Signed-off-by: Hailiang Zhang 

OK, I think this is right, but it took me quite a while to understand,
I think one of the comments below might not be right:

> ---
>  migration/colo.c |  3 +++
>  migration/ram.c  | 35 +++
>  migration/ram.h  |  1 +
>  3 files changed, 31 insertions(+), 8 deletions(-)
> 
> diff --git a/migration/colo.c b/migration/colo.c
> index d30c6bc4ad..febf010571 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -26,6 +26,7 @@
>  #include "qemu/main-loop.h"
>  #include "qemu/rcu.h"
>  #include "migration/failover.h"
> +#include "migration/ram.h"
>  #ifdef CONFIG_REPLICATION
>  #include "replication.h"
>  #endif
> @@ -906,6 +907,8 @@ void *colo_process_incoming_thread(void *opaque)
>   */
>  qemu_file_set_blocking(mis->from_src_file, true);
>  
> +colo_incoming_start_dirty_log();
> +
>  bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
>  fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
>  object_unref(OBJECT(bioc));
> diff --git a/migration/ram.c b/migration/ram.c
> index ed23ed1c7c..24a8aa3527 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2986,7 +2986,6 @@ int colo_init_ram_cache(void)
>  }
>  return -errno;
>  }
> -memcpy(block->colo_cache, block->host, block->used_length);
>  }
>  }
>  
> @@ -3005,12 +3004,16 @@ int colo_init_ram_cache(void)
>  bitmap_set(block->bmap, 0, pages);
>  }
>  }
> +
> +return 0;
> +}
> +
> +void colo_incoming_start_dirty_log(void)
> +{
>  ram_state = g_new0(RAMState, 1);
>  ram_state->migration_dirty_pages = 0;
>  qemu_mutex_init(_state->bitmap_mutex);
>  memory_global_dirty_log_start();
> -
> -return 0;
>  }
>  
>  /* It is need to hold the global lock to call this helper */
> @@ -3348,7 +3351,7 @@ static int ram_load_precopy(QEMUFile *f)
>  
>  while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
>  ram_addr_t addr, total_ram_bytes;
> -void *host = NULL;
> +void *host = NULL, *host_bak = NULL;
>  uint8_t ch;
>  
>  /*
> @@ -3378,13 +3381,26 @@ static int ram_load_precopy(QEMUFile *f)
>  if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
>   RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
>  RAMBlock *block = ram_block_from_stream(f, flags);
> -
>  /*
> - * After going into COLO, we should load the Page into 
> colo_cache.
> + * After going into COLO, we should load the Page into colo_cache
> + * NOTE: We need to keep a copy of SVM's ram in colo_cache.
> + * Privously, we copied all these memory in preparing stage of 
> COLO
> + * while we need to stop VM, which is a time-consuming process.
> + * Here we optimize it by a trick, back-up every page while in
> + * migration process while COLO is enabled, though it affects the
> + * speed of the migration, but it obviously reduce the downtime 
> of
> + * back-up all SVM'S memory in COLO preparing stage.
>   */
> -if (migration_incoming_in_colo_state()) {
> +if (migration_incoming_colo_enabled()) {
>  host = colo_cache_from_block_offset(block, addr);
> -} else {
> +/*
> + * After going into COLO, load the Page into colo_cache.
> + */
> +if (!migration_incoming_in_colo_state()) {
> +host_bak = host;
> +}
> +}
> +if (!migration_incoming_in_colo_state()) {
>  host = host_from_ram_block_offset(block, addr);

So this works out as quite complicated:
   a) In normal migration we do the last one and just set:
 host = host_from_ram_block_offset(block, addr);
 host_bak = NULL

   b) At the start, when colo_enabled, but !in_colo_state
 host = colo_cache
 host_bak = host
 host = host_from_ram_block_offset

   c) in_colo_state
 host = colo_cache
 host_bak = NULL


(b) is pretty confusing, setting host twice; can't we tidy that up?

Also, that last comment 'After going into COLO' I think is really
  'Before COLO state, copy from ram into cache'

Dave

>  }
>  if (!host) {
> @@ -3506,6 +3522,9 @@ static int ram_load_precopy(QEMUFile *f)
>   

Re: [PATCH 0/3] Optimize VM's downtime while do checkpoint in COLO

2020-02-20 Thread Dr. David Alan Gilbert
* Hailiang Zhang (zhang.zhanghaili...@huawei.com) wrote:
> Hi,
> 
> This is an untested serial that tries to reduce VM's pause time
> while do checkpoint in COLO state.
> 
> The second patch tries to reduce the total number of dirty pages
> while do checkpoint with VM been paused, instead of sending all
> dirty pages while VM been pause, it sends part of dirty pages during
> the gap time of two checkpoints when SVM and PVM are running.
> 
> The third patch tries to reduce the pause time of backup ram into
> cache in secondary part.

This is quite nice; I've asked for a clarification on the last one, but
it's only a tidy up.

I guess this really helps continuous-COLO as well, because it means
that restarting to the sync to the new secondary is a lot simpler?

Dave

> 
> Hailiang Zhang (3):
>   migration/colo: wrap incoming checkpoint process into new helper
>   COLO: Migrate dirty pages during the gap of checkpointing
>   COLO: Optimize memory back-up process
> 
>  migration/colo.c   | 332 +
>  migration/migration.h  |   1 +
>  migration/ram.c|  35 -
>  migration/ram.h|   1 +
>  migration/trace-events |   1 +
>  qapi/migration.json|   4 +-
>  6 files changed, 234 insertions(+), 140 deletions(-)
> 
> -- 
> 2.21.0
> 
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: RFC: Split EPT huge pages in advance of dirty logging

2020-02-20 Thread Peter Xu
On Thu, Feb 20, 2020 at 09:34:52AM -0800, Ben Gardon wrote:
> On Thu, Feb 20, 2020 at 5:53 AM Zhoujian (jay)  
> wrote:
> >
> >
> >
> > > -Original Message-
> > > From: Peter Xu [mailto:pet...@redhat.com]
> > > Sent: Thursday, February 20, 2020 1:19 AM
> > > To: Zhoujian (jay) 
> > > Cc: k...@vger.kernel.org; qemu-devel@nongnu.org; pbonz...@redhat.com;
> > > dgilb...@redhat.com; quint...@redhat.com; Liujinsong (Paul)
> > > ; linfeng (M) ; wangxin (U)
> > > ; Huangweidong (C)
> > > 
> > > Subject: Re: RFC: Split EPT huge pages in advance of dirty logging
> > >
> > > On Wed, Feb 19, 2020 at 01:19:08PM +, Zhoujian (jay) wrote:
> > > > Hi Peter,
> > > >
> > > > > -Original Message-
> > > > > From: Peter Xu [mailto:pet...@redhat.com]
> > > > > Sent: Wednesday, February 19, 2020 1:43 AM
> > > > > To: Zhoujian (jay) 
> > > > > Cc: k...@vger.kernel.org; qemu-devel@nongnu.org;
> > > pbonz...@redhat.com;
> > > > > dgilb...@redhat.com; quint...@redhat.com; Liujinsong (Paul)
> > > > > ; linfeng (M) ;
> > > > > wangxin (U) ; Huangweidong (C)
> > > > > 
> > > > > Subject: Re: RFC: Split EPT huge pages in advance of dirty logging
> > > > >
> > > > > On Tue, Feb 18, 2020 at 01:13:47PM +, Zhoujian (jay) wrote:
> > > > > > Hi all,
> > > > > >
> > > > > > We found that the guest will be soft-lockup occasionally when live
> > > > > > migrating a 60 vCPU, 512GiB huge page and memory sensitive VM. The
> > > > > > reason is clear, almost all of the vCPUs are waiting for the KVM
> > > > > > MMU spin-lock to create 4K SPTEs when the huge pages are write
> > > > > > protected. This
> > > > > phenomenon is also described in this patch set:
> > > > > > https://patchwork.kernel.org/cover/11163459/
> > > > > > which aims to handle page faults in parallel more efficiently.
> > > > > >
> > > > > > Our idea is to use the migration thread to touch all of the guest
> > > > > > memory in the granularity of 4K before enabling dirty logging. To
> > > > > > be more specific, we split all the PDPE_LEVEL SPTEs into
> > > > > > DIRECTORY_LEVEL SPTEs as the first step, and then split all the
> > > > > > DIRECTORY_LEVEL SPTEs into
> > > > > PAGE_TABLE_LEVEL SPTEs as the following step.
> > > > >
> > > > > IIUC, QEMU will prefer to use huge pages for all the anonymous
> > > > > ramblocks (please refer to ram_block_add):
> > > > >
> > > > > qemu_madvise(new_block->host, new_block->max_length,
> > > > > QEMU_MADV_HUGEPAGE);
> > > >
> > > > Yes, you're right
> > > >
> > > > >
> > > > > Another alternative I can think of is to add an extra parameter to
> > > > > QEMU to explicitly disable huge pages (so that can even be
> > > > > MADV_NOHUGEPAGE instead of MADV_HUGEPAGE).  However that
> > > should also
> > > > > drag down the performance for the whole lifecycle of the VM.
> > > >
> > > > From the performance point of view, it is better to keep the huge
> > > > pages when the VM is not in the live migration state.
> > > >
> > > > > A 3rd option is to make a QMP
> > > > > command to dynamically turn huge pages on/off for ramblocks globally.
> > > >
> > > > We're searching a dynamic method too.
> > > > We plan to add two new flags for each memory slot, say
> > > > KVM_MEM_FORCE_PT_DIRECTORY_PAGES and
> > > > KVM_MEM_FORCE_PT_PAGE_TABLE_PAGES. These flags can be set through
> > > > KVM_SET_USER_MEMORY_REGION ioctl.

[1]

> > > >
> > > > The mapping_level which is called by tdp_page_fault in the kernel side
> > > > will return PT_DIRECTORY_LEVEL if the
> > > KVM_MEM_FORCE_PT_DIRECTORY_PAGES
> > > > flag of the memory slot is set, and return PT_PAGE_TABLE_LEVEL if the
> > > > KVM_MEM_FORCE_PT_PAGE_TABLE_PAGES flag is set.
> > > >
> > > > The key steps to split the huge pages in advance of enabling dirty log
> > > > is as follows:
> > > > 1. The migration thread in user space uses
> > > KVM_SET_USER_MEMORY_REGION
> > > > ioctl to set the KVM_MEM_FORCE_PT_DIRECTORY_PAGES flag for each
> > > memory
> > > > slot.
> > > > 2. The migration thread continues to use the KVM_SPLIT_HUGE_PAGES
> > > > ioctl (which is newly added) to do the splitting of large pages in the
> > > > kernel side.
> > > > 3. A new vCPU is created temporally(do some initialization but will
> > > > not
> > > > run) to help to do the work, i.e. as the parameter of the 
> > > > tdp_page_fault.
> > > > 4. Collect the GPA ranges of all the memory slots with the
> > > > KVM_MEM_FORCE_PT_DIRECTORY_PAGES flag set.
> > > > 5. Split the 1G huge pages(collected in step 4) into 2M by calling
> > > > tdp_page_fault, since the mapping_level will return
> > > > PT_DIRECTORY_LEVEL. Here is the main difference from the usual path
> > > > which is caused by the Guest side(EPT violation/misconfig etc), we
> > > > call it directly in the hypervisor side.
> > > > 6. Do some cleanups, i.e. free the vCPU related resources 7. The
> > > > KVM_SPLIT_HUGE_PAGES ioctl returned to the user space side.
> > > > 8. Using KVM_MEM_FORCE_PT_PAGE_TABLE_PAGES instread of
> > > > 

Re: [PATCH] tcg: gdbstub: Fix single-step issue on arm target

2020-02-20 Thread Laurent Vivier
Le 20/02/2020 à 18:47, Philippe Mathieu-Daudé a écrit :
> On 2/20/20 4:58 PM, Changbin Du wrote:
>> Recently when debugging an arm32 system on qemu, I found sometimes the
>> single-step command (stepi) is not working. This can be reproduced by
>> below steps:
>>   1) start qemu-system-arm -s -S .. and wait for gdb connection.
>>   2) start gdb and connect to qemu. In my case, gdb gets a wrong value
>>  (0x60) for PC.
>>   3) After connected, type 'stepi' and expect it will stop at next ins.
>>
>> But, it has never stopped. This because:
>>   1) We doesn't report ‘vContSupported’ feature to gdb explicitly and gdb
>>  think we do not support it. In this case, gdb use a software
>> breakpoint
>>  to emulate single-step.
>>   2) Since gdb gets a wrong initial value of PC, then gdb inserts a
>>  breakpoint to wrong place (PC+4).
>>
>> Since we do support ‘vContSupported’ query command, so let's tell gdb
>> that
>> we support it.
>>
>> Before this change, gdb send below 'Z0' packet to implement single-step:
>> gdb_handle_packet: Z0,4,4
>>
>> After this change, gdb send "vCont;s.." which is expected:
>> gdb_handle_packet: vCont?
>> put_packet: vCont;c;C;s;S
>> gdb_handle_packet: vCont;s:p1.1;c:p1.-1
> 
> You actually fixed this for all architectures :)
> 
> This has been annoying me on MIPS since more than a year...

The problem started with an update of QEMU or of GDB?

At one point it seemed to work, so what happened?

Thanks,
Laurent



Re: [PATCH] tcg: gdbstub: Fix single-step issue on arm target

2020-02-20 Thread Peter Maydell
On Thu, 20 Feb 2020 at 15:59, Changbin Du  wrote:
>
> Recently when debugging an arm32 system on qemu, I found sometimes the
> single-step command (stepi) is not working. This can be reproduced by
> below steps:
>  1) start qemu-system-arm -s -S .. and wait for gdb connection.
>  2) start gdb and connect to qemu. In my case, gdb gets a wrong value
> (0x60) for PC.
>  3) After connected, type 'stepi' and expect it will stop at next ins.
>
> But, it has never stopped. This because:
>  1) We doesn't report ‘vContSupported’ feature to gdb explicitly and gdb
> think we do not support it. In this case, gdb use a software breakpoint
> to emulate single-step.
>  2) Since gdb gets a wrong initial value of PC, then gdb inserts a
> breakpoint to wrong place (PC+4).
>
> Since we do support ‘vContSupported’ query command, so let's tell gdb that
> we support it.
>
> Before this change, gdb send below 'Z0' packet to implement single-step:
> gdb_handle_packet: Z0,4,4
>
> After this change, gdb send "vCont;s.." which is expected:
> gdb_handle_packet: vCont?
> put_packet: vCont;c;C;s;S
> gdb_handle_packet: vCont;s:p1.1;c:p1.-1
>
> Signed-off-by: Changbin Du 

Certainly if we support vCont we should advertise it. But why
does the fallback path not work? That is, why does gdb get a
wrong PC value initially?

thanks
-- PMM



Re: [PATCH v2 2/2] hw: move timer_new from init() into realize() to avoid memleaks

2020-02-20 Thread Peter Maydell
On Mon, 17 Feb 2020 at 03:22,  wrote:
>
> From: Pan Nengyuan 
>
> There are some memleaks when we call 'device_list_properties'. This patch 
> move timer_new from init into realize to fix it.
> Meanwhile, do the null check in mos6522_reset() to avoid null deref if we 
> move timer_new into realize().
>
> Reported-by: Euler Robot 
> Signed-off-by: Pan Nengyuan 
> Reviewed-by: Philippe Mathieu-Daudé 


> diff --git a/hw/misc/mos6522.c b/hw/misc/mos6522.c
> index 19e154b870..980eda7599 100644
> --- a/hw/misc/mos6522.c
> +++ b/hw/misc/mos6522.c
> @@ -465,11 +465,15 @@ static void mos6522_reset(DeviceState *dev)
>  s->timers[0].frequency = s->frequency;
>  s->timers[0].latch = 0x;
>  set_counter(s, >timers[0], 0x);
> -timer_del(s->timers[0].timer);
> +if (s->timers[0].timer) {
> +timer_del(s->timers[0].timer);
> +}
>
>  s->timers[1].frequency = s->frequency;
>  s->timers[1].latch = 0x;
> -timer_del(s->timers[1].timer);
> +if (s->timers[1].timer) {
> +timer_del(s->timers[1].timer);
> +}
>  }

What code path calls a device 'reset' method on a device
that has not yet been realized ? I wasn't expecting that
to be valid...

thanks
-- PMM



Re: [PATCH 11/19] target/arm: Replace ARM_FEATURE_VFP4 with isar_feature_aa32_simdfmac

2020-02-20 Thread Richard Henderson
On 2/20/20 8:37 AM, Peter Maydell wrote:
> This is tricky, because the SIMDFMAC register
> field indicates "do we have fused-multiply-accumulate
> for either VFP or Neon", so in a VFP-no-Neon core or
> a Neon-no-VFP core it will be 1 but can't be used on its
> own as a gate on "should this insn be present".
> 
> Currently in the part of arm_cpu_realize() which handles
> the user having selected vfp=off and/or neon=off we
> do allow (for AArch32 cores) both of those combinations.
> 
> trans_VFM_dp already tests aa32_fpdp_v2, so I think the
> main thing we need to do is add a test on aa32_fpsp_v2 to
> trans_VFM_sp.
> 
> We clear the SIMDFMAC field to 0 in the !has_neon condition,
> and I think that should actually be in the !neon && !vfp part.
> 
> I propose to squash in the following and beef up the commit message:

Good catch.  Makes sense.


r~



Re: [PATCH 00/19] target/arm: vfp feature and decodetree cleanup

2020-02-20 Thread Peter Maydell
On Fri, 14 Feb 2020 at 18:15, Richard Henderson
 wrote:
>
> The main goal of the patchset is to move the ARM_FEATURE_VFP
> test from outside of the disas_vfp_insn() to inside each of
> the trans_* functions, so that we get the proper ISA check
> for each case.
>
> At the end of that, it is easy to eliminate all of the remaining
> tests vs ARM_FEATURE_VFP* in favor of the preferred ISAR tests.
>
> Finally, there are a couple of cleanups to vfp.decode to make
> things a bit more legible.
>
>
> r~
>
>
> Richard Henderson (19):
>   target/arm: Fix field extract from MVFR[0-2]
>   target/arm: Rename isar_feature_aa32_simd_r32
>   target/arm: Use isar_feature_aa32_simd_r32 more places
>   target/arm: Set MVFR0.FPSP for ARMv5 cpus
>   target/arm: Add isar_feature_aa32_simd_r16
>   target/arm: Rename isar_feature_aa32_fpdp_v2
>   target/arm: Add isar_feature_aa32_{fpsp_v2,fpsp_v3,fpdp_v3}
>   target/arm: Perform fpdp_v2 check first
>   target/arm: Replace ARM_FEATURE_VFP3 checks with fp{sp,dp}_v3
>   target/arm: Add missing checks for fpsp_v2
>   target/arm: Replace ARM_FEATURE_VFP4 with isar_feature_aa32_simdfmac
>   target/arm: Remove ARM_FEATURE_VFP check from disas_vfp_insn
>   target/arm: Move VLLDM and VLSTM to vfp.decode
>   target/arm: Move the vfp decodetree calls next to the base isa
>   linux-user/arm: Replace ARM_FEATURE_VFP* tests for HWCAP
>   target/arm: Remove ARM_FEATURE_VFP*
>   target/arm: Add formats for some vfp 2 and 3-register insns
>   target/arm: Split VFM decode
>   target/arm: Split VMINMAXNM decode

Hi; patch 1 here already has a version in my PMU patchset.
I've applied patches 2-10 to target-arm.next, with one or
two minor fixups for things like the checkpatch long-line
warning and a typo here or there in commit message or comment
(expect a pullreq either today or tomorrow). Patches 11-19
I've sent reviewed-by tags or comments on.

thanks
-- PMM



Re: [PATCH] tcg: gdbstub: Fix single-step issue on arm target

2020-02-20 Thread Philippe Mathieu-Daudé

On 2/20/20 4:58 PM, Changbin Du wrote:

Recently when debugging an arm32 system on qemu, I found sometimes the
single-step command (stepi) is not working. This can be reproduced by
below steps:
  1) start qemu-system-arm -s -S .. and wait for gdb connection.
  2) start gdb and connect to qemu. In my case, gdb gets a wrong value
 (0x60) for PC.
  3) After connected, type 'stepi' and expect it will stop at next ins.

But, it has never stopped. This because:
  1) We doesn't report ‘vContSupported’ feature to gdb explicitly and gdb
 think we do not support it. In this case, gdb use a software breakpoint
 to emulate single-step.
  2) Since gdb gets a wrong initial value of PC, then gdb inserts a
 breakpoint to wrong place (PC+4).

Since we do support ‘vContSupported’ query command, so let's tell gdb that
we support it.

Before this change, gdb send below 'Z0' packet to implement single-step:
gdb_handle_packet: Z0,4,4

After this change, gdb send "vCont;s.." which is expected:
gdb_handle_packet: vCont?
put_packet: vCont;c;C;s;S
gdb_handle_packet: vCont;s:p1.1;c:p1.-1


You actually fixed this for all architectures :)

This has been annoying me on MIPS since more than a year...

I haven't checked the GDB protocol spec, but so far:
Tested-by: Philippe Mathieu-Daudé 



Signed-off-by: Changbin Du 
---
  gdbstub.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gdbstub.c b/gdbstub.c
index ce304ff482..adccd938e2 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2111,7 +2111,7 @@ static void handle_query_supported(GdbCmdContext 
*gdb_ctx, void *user_ctx)
  gdb_ctx->s->multiprocess = true;
  }
  
-pstrcat(gdb_ctx->str_buf, sizeof(gdb_ctx->str_buf), ";multiprocess+");

+pstrcat(gdb_ctx->str_buf, sizeof(gdb_ctx->str_buf), 
";vContSupported+;multiprocess+");
  put_packet(gdb_ctx->s, gdb_ctx->str_buf);
  }
  






Re: [PATCH 19/19] target/arm: Split VMINMAXNM decode

2020-02-20 Thread Peter Maydell
On Fri, 14 Feb 2020 at 18:16, Richard Henderson
 wrote:
>
> Passing the raw op field from the manual is less instructive
> than it might be.  Do the full decode and use the existing
> helpers to perform the expansion.
>
> Since these are v8 insns, VECLEN+VECSTRIDE are already RES0.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PATCH 18/19] target/arm: Split VFM decode

2020-02-20 Thread Peter Maydell
On Fri, 14 Feb 2020 at 18:16, Richard Henderson
 wrote:
>
> Passing the raw o1 and o2 fields from the manual is less
> instructive than it might be.  Do the full decode and let
> the trans_* functions pass in booleans to a helper.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/arm/vfp.decode  | 17 +--
>  target/arm/translate-vfp.inc.c | 52 ++
>  2 files changed, 55 insertions(+), 14 deletions(-)
>

Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PATCH 17/19] target/arm: Add formats for some vfp 2 and 3-register insns

2020-02-20 Thread Peter Maydell
On Fri, 14 Feb 2020 at 18:16, Richard Henderson
 wrote:
>
> Those vfp instructions without extra opcode fields can
> share a common @format for brevity.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/arm/vfp.decode | 134 --
>  1 file changed, 52 insertions(+), 82 deletions(-)
>
> diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
> index 592fe9e1e4..4f294f88be 100644
> --- a/target/arm/vfp.decode
> +++ b/target/arm/vfp.decode
> @@ -46,6 +46,14 @@
>
>  %vmov_imm 16:4 0:4
>
> +@vfp_dnm_s    vm=%vm_sp vn=%vn_sp vd=%vd_sp
> +@vfp_dnm_d    vm=%vm_dp vn=%vn_dp vd=%vd_dp
> +
> +@vfp_dm_ss    vm=%vm_sp vd=%vd_sp
> +@vfp_dm_dd    vm=%vm_dp vd=%vd_dp
> +@vfp_dm_ds    vm=%vm_sp vd=%vd_dp
> +@vfp_dm_sd    vm=%vm_dp vd=%vd_sp

I'm less convinced about the ds and sd ones because there aren't
very many uses of them, and now you have to go back from the
insn line to the format line to check which way round the single
and the double are if you want to confirm that the decode is right.

But anyway
Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PULL 00/17] Trivial branch patches

2020-02-20 Thread Peter Maydell
On Wed, 19 Feb 2020 at 10:16, Laurent Vivier  wrote:
>
> The following changes since commit 6c599282f8ab382fe59f03a6cae755b89561a7b3:
>
>   Merge remote-tracking branch 
> 'remotes/armbru/tags/pull-monitor-2020-02-15-v2' into staging (2020-02-17 
> 13:32:25 +)
>
> are available in the Git repository at:
>
>   git://github.com/vivier/qemu.git tags/trivial-branch-pull-request
>
> for you to fetch changes up to d1cb67841ca213802ee789957188ec87e8b7996d:
>
>   hw/xtensa/xtfpga:fix leak of fdevice tree blob (2020-02-19 10:33:38 +0100)
>
> 
> Fix memory leak with fdt
> cosmetic change in code and logs
> update mailmap
>


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/5.0
for any user-visible changes.

-- PMM



Re: [PATCH 16/19] target/arm: Remove ARM_FEATURE_VFP*

2020-02-20 Thread Peter Maydell
On Fri, 14 Feb 2020 at 18:16, Richard Henderson
 wrote:
>
> We have converted all tests against these features
> to ISAR tests.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PATCH 1/2] riscv: roms: Add 32-bit OpenSBI firmware image for sifive_u

2020-02-20 Thread Philippe Mathieu-Daudé

Hi Bin,

On 2/20/20 3:42 PM, Bin Meng wrote:

Although the real world SiFive HiFive Unleashed board is a 64-bit
hardware configuration, with QEMU it is possible to test 32-bit
configuration with the same hardware features.

This updates the roms Makefile to add the build rules for creating
the 32-bit OpenSBI firmware image for sifive_u machine. A pre-built
OpenSBI image (built from commit 3e7d666) has been added as the
default bios for 32-bit sifive_u machine.


With QEMU:

fatal: ambiguous argument '3e7d666': unknown revision or path not in the 
working tree.


This looks like an OpenSBI commit but QEMU only include up to v0.5.

Can you build v0.5? Else can you update the submodule?

Also, can you add a CI job to build this, so we have reproducible builds 
(see QEMU commit 71920809ceabed as example)?


Thanks,

Phil.



Signed-off-by: Bin Meng 
---

  Makefile |   2 +-
  pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin | Bin 0 -> 49472 bytes
  roms/Makefile|   7 +++
  3 files changed, 8 insertions(+), 1 deletion(-)
  create mode 100644 pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin

diff --git a/Makefile b/Makefile
index b5a7377..ee7aa6d 100644
--- a/Makefile
+++ b/Makefile
@@ -804,7 +804,7 @@ u-boot.e500 u-boot-sam460-20100605.bin \
  qemu_vga.ndrv \
  edk2-licenses.txt \
  hppa-firmware.img \
-opensbi-riscv32-virt-fw_jump.bin \
+opensbi-riscv32-sifive_u-fw_jump.bin opensbi-riscv32-virt-fw_jump.bin \
  opensbi-riscv64-sifive_u-fw_jump.bin opensbi-riscv64-virt-fw_jump.bin
  
  
diff --git a/pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin b/pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin

new file mode 100644
index 
..93e2556baa261bfd796cdc484b345bf958c8afea
GIT binary patch
literal 49472
zcmce<3tUrIwm-g4PEJmO55TBst1S`%p{+7bwmfJ_Q5GSc5WXIb-Yf;
zTQE7qfCw5frPUeg1C*IsYpP7Wf8`k>S|7E7)v4ARk!l~_uc0Z3D#==_x}EW
z{e+X8v-e(Wuf6u#Yp=c5I(j8X9A=K_H7YVHRIkQ2PDvc1lIT?$(*9dPIfxO4a@rs@
zAzHOMUYz>$2;Awf?q^pJVq29O7sTQT>%OfkN+n(QVoh5Vr0aeRt7C~BWo)Lc(Wb2|
z(f9pL^kaV>ZW?~A)@NA`29{jcbae-+xNA}g5kf2@SE4jgq`x8
zwg2wDGwS`WJFP+fDC_Uv*Zbt?F2V3NYQ0Vxiv?#jBYf^=T6eL#SfVScV%90vvF^jE
z9jn#~|El6-oZRCC=WmGcc{Q_emwbo4dndWL>&^`sP9EC*f71%h)qW9y;QxU$S
z>d4X%J?}??%0NBd^>3ypc#eJ2$OT=*Z$<1^4oeO@Wxc+yf{1ZUoLYl`R2_ws+x{<~tiBVTpOVkc$dmIy`1`$EM#1

Re: [PATCH 15/19] linux-user/arm: Replace ARM_FEATURE_VFP* tests for HWCAP

2020-02-20 Thread Peter Maydell
On Fri, 14 Feb 2020 at 18:16, Richard Henderson
 wrote:
>
> Use isar feature tests instead of feature bit tests.
>
> Although none of QEMUs current cpus have VFPv3 without D32,
> replace the large comment explaining why with one line that
> sets ARM_HWCAP_ARM_VFPv3D16 under the correct conditions.
> Mirror the test sequence used in the linux kernel.
>
> Signed-off-by: Richard Henderson 
> ---
>  linux-user/elfload.c | 24 +++-
>  1 file changed, 15 insertions(+), 9 deletions(-)
>
> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> index f3080a1635..c52c814a2e 100644
> --- a/linux-user/elfload.c
> +++ b/linux-user/elfload.c
> @@ -468,22 +468,28 @@ static uint32_t get_elf_hwcap(void)
>
>  /* EDSP is in v5TE and above, but all our v5 CPUs are v5TE */
>  GET_FEATURE(ARM_FEATURE_V5, ARM_HWCAP_ARM_EDSP);
> -GET_FEATURE(ARM_FEATURE_VFP, ARM_HWCAP_ARM_VFP);
>  GET_FEATURE(ARM_FEATURE_IWMMXT, ARM_HWCAP_ARM_IWMMXT);
>  GET_FEATURE(ARM_FEATURE_THUMB2EE, ARM_HWCAP_ARM_THUMBEE);
>  GET_FEATURE(ARM_FEATURE_NEON, ARM_HWCAP_ARM_NEON);
> -GET_FEATURE(ARM_FEATURE_VFP3, ARM_HWCAP_ARM_VFPv3);
>  GET_FEATURE(ARM_FEATURE_V6K, ARM_HWCAP_ARM_TLS);
> -GET_FEATURE(ARM_FEATURE_VFP4, ARM_HWCAP_ARM_VFPv4);
> +GET_FEATURE(ARM_FEATURE_LPAE, ARM_HWCAP_ARM_LPAE);
> +
>  GET_FEATURE_ID(arm_div, ARM_HWCAP_ARM_IDIVA);
>  GET_FEATURE_ID(thumb_div, ARM_HWCAP_ARM_IDIVT);
> -/* All QEMU's VFPv3 CPUs have 32 registers, see VFP_DREG in translate.c.
> - * Note that the ARM_HWCAP_ARM_VFPv3D16 bit is always the inverse of
> - * ARM_HWCAP_ARM_VFPD32 (and so always clear for QEMU); it is unrelated
> - * to our VFP_FP16 feature bit.
> +/*
> + * Note that none of QEMU's cpus have double precision without single
> + * precision support in VFP, so only test the single precision field.
>   */

Why not actually mirror the kernel's test sequence, rather
than having a comment about how we deviate from it ?

> -GET_FEATURE(ARM_FEATURE_VFP3, ARM_HWCAP_ARM_VFPD32);
> -GET_FEATURE(ARM_FEATURE_LPAE, ARM_HWCAP_ARM_LPAE);
> +GET_FEATURE_ID(aa32_fpsp_v2, ARM_HWCAP_ARM_VFP);
> +if (cpu_isar_feature(aa32_fpsp_v3, cpu)) {
> +hwcaps |= ARM_HWCAP_ARM_VFPv3;
> +if (cpu_isar_feature(aa32_simd_r32, cpu)) {
> +hwcaps |= ARM_HWCAP_ARM_VFPD32;
> +} else {
> +hwcaps |= ARM_HWCAP_ARM_VFPv3D16;
> +}
> +}
> +GET_FEATURE_ID(aa32_simdfmac, ARM_HWCAP_ARM_VFPv4);
>
>  return hwcaps;
>  }

thanks
-- PMM



RE: [EXTERNAL] Re: [PATCH RESEND 11/13] target/i386/whpx: Remove superfluous semicolon

2020-02-20 Thread Justin Terry (SF)
+Sunil Muthuswamy 

LGTM. Thanks!

Reviewed-by: Justin Terry (VM) 

> -Original Message-
> From: Dr. David Alan Gilbert 
> Sent: Tuesday, February 18, 2020 2:00 AM
> To: Philippe Mathieu-Daudé 
> Cc: qemu-devel@nongnu.org; Max Reitz ; Kevin Wolf
> ; Thomas Huth ; Fam Zheng
> ; Eduardo Habkost ; Alex
> Williamson ; Marcel Apfelbaum
> ; Richard Henderson ;
> Julia Suvorova ; Thomas Huth ;
> Edgar E. Iglesias ; Aarushi Mehta
> ; qemu-triv...@nongnu.org; Stefan Hajnoczi
> ; Alistair Francis ; Gerd
> Hoffmann ; Luc Michel ;
> Laurent Vivier ; Juan Quintela
> ; Michael Tokarev ; Laurent Vivier
> ; Paolo Bonzini ; Yuval Shaia
> ; qemu-...@nongnu.org; Peter Maydell
> ; qemu-bl...@nongnu.org; Justin Terry (SF)
> 
> Subject: [EXTERNAL] Re: [PATCH RESEND 11/13] target/i386/whpx: Remove
> superfluous semicolon
> 
> * Philippe Mathieu-Daudé (phi...@redhat.com) wrote:
> > Fixes: 812d49f2a3e
> > Signed-off-by: Philippe Mathieu-Daudé 
> 
> Reviewed-by: Dr. David Alan Gilbert 
> 
> > ---
> > Cc: Justin Terry (VM) 
> > ---
> >  target/i386/whpx-all.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c index
> > 3ed2aa1892..35601b8176 100644
> > --- a/target/i386/whpx-all.c
> > +++ b/target/i386/whpx-all.c
> > @@ -511,7 +511,7 @@ static void whpx_get_registers(CPUState *cpu)
> >  /* WHvX64RegisterPat - Skipped */
> >
> >  assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
> > -env->sysenter_cs = vcxt.values[idx++].Reg64;;
> > +env->sysenter_cs = vcxt.values[idx++].Reg64;
> >  assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
> >  env->sysenter_eip = vcxt.values[idx++].Reg64;
> >  assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
> > --
> > 2.21.1
> >
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [PATCH v1 2/4] Acceptance test: EXEC migration

2020-02-20 Thread Wainer dos Santos Moschetta



On 2/14/20 12:52 PM, Oksana Vohchana wrote:

Improves EXEC migration to run whole test stage

Signed-off-by: Oksana Vohchana 
---
  tests/acceptance/migration.py | 2 ++
  1 file changed, 2 insertions(+)


Indeed, with this changes the migration is triggered.

Tested-by: Wainer dos Santos Moschetta 



diff --git a/tests/acceptance/migration.py b/tests/acceptance/migration.py
index e4c39b85a1..8209dcf71d 100644
--- a/tests/acceptance/migration.py
+++ b/tests/acceptance/migration.py
@@ -75,3 +75,5 @@ class Migration(Test):
  """
  free_port = self._get_free_port()
  dest_uri = 'exec:nc -l localhost %u' % free_port
+src_uri = 'exec:nc localhost %u' % free_port
+self.do_migrate(dest_uri, src_uri)





Re: [RFC PATCH v3 11/27] qcow2: Add QCow2SubclusterType and qcow2_get_subcluster_type()

2020-02-20 Thread Max Reitz
On 22.12.19 12:36, Alberto Garcia wrote:
> This patch adds QCow2SubclusterType, which is the subcluster-level
> version of QCow2ClusterType. All QCOW2_SUBCLUSTER_* values have the
> the same meaning as their QCOW2_CLUSTER_* equivalents (when they
> exist). See below for details and caveats.
> 
> In images without extended L2 entries clusters are treated as having
> exactly one subcluster so it is possible to replace one data type with
> the other while keeping the exact same semantics.
> 
> With extended L2 entries there are new possible values, and every
> subcluster in the same cluster can obviously have a different
> QCow2SubclusterType so functions need to be adapted to work on the
> subcluster level.
> 
> There are several things that have to be taken into account:
> 
>   a) QCOW2_SUBCLUSTER_COMPRESSED means that the whole cluster is
>  compressed. We do not support compression at the subcluster
>  level.
> 
>   b) There are two different values for unallocated subclusters:
>  QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN which means that the whole
>  cluster is unallocated, and QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC
>  which means that the cluster is allocated but the subcluster is
>  not. The latter can only happen in images with extended L2
>  entries.
> 
>   c) QCOW2_SUBCLUSTER_INVALID is used to detect the cases where an L2
>  entry has a value that violates the specification. The caller is
>  responsible for handling these situations.
> 
>  To prevent compatibility problems with images that have invalid
>  values but are currently being read by QEMU without causing side
>  effects, QCOW2_SUBCLUSTER_INVALID is only returned for images
>  with extended L2 entries.
> 
> qcow2_cluster_to_subcluster_type() is added as a separate function
> from qcow2_get_subcluster_type(), but this is only temporary and both
> will be merged in a subsequent patch.
> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2.h | 92 +++
>  1 file changed, 92 insertions(+)

With the comment style fixed as now required by the coding style (/* and
*/ on separate lines), and regardless of the bit ordering:

Reviewed-by: Max Reitz 



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 12/19] target/arm: Remove ARM_FEATURE_VFP check from disas_vfp_insn

2020-02-20 Thread Peter Maydell
On Fri, 14 Feb 2020 at 18:16, Richard Henderson
 wrote:
>
> We now have proper ISA checks within each trans_* function.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/arm/translate.c | 4 
>  1 file changed, 4 deletions(-)
>
> diff --git a/target/arm/translate.c b/target/arm/translate.c
> index 0da780102c..e8c3d4f26f 100644
> --- a/target/arm/translate.c
> +++ b/target/arm/translate.c
> @@ -2652,10 +2652,6 @@ static void gen_neon_dup_high16(TCGv_i32 var)
>   */
>  static int disas_vfp_insn(DisasContext *s, uint32_t insn)
>  {
> -if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
> -return 1;
> -}
> -
>  /*
>   * If the decodetree decoder handles this insn it will always
>   * emit code to either execute the insn or generate an appropriate
> --
> 2.20.1

Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PATCH 14/19] target/arm: Move the vfp decodetree calls next to the base isa

2020-02-20 Thread Peter Maydell
On Fri, 14 Feb 2020 at 18:16, Richard Henderson
 wrote:
>
> Have the calls adjacent as an intermediate step toward
> actually merging the decodes.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/arm/translate.c | 80 +-
>  1 file changed, 24 insertions(+), 56 deletions(-)
>
> diff --git a/target/arm/translate.c b/target/arm/translate.c
> index b2641b4262..5cabe6b2e9 100644
> --- a/target/arm/translate.c
> +++ b/target/arm/translate.c
> @@ -2646,31 +2646,6 @@ static void gen_neon_dup_high16(TCGv_i32 var)
>  tcg_temp_free_i32(tmp);
>  }
>
> -/*
> - * Disassemble a VFP instruction.  Returns nonzero if an error occurred
> - * (ie. an undefined instruction).
> - */
> -static int disas_vfp_insn(DisasContext *s, uint32_t insn)
> -{
> -/*
> - * If the decodetree decoder handles this insn it will always
> - * emit code to either execute the insn or generate an appropriate
> - * exception; so we don't need to ever return non-zero to tell
> - * the calling code to emit an UNDEF exception.
> - */
> -if (extract32(insn, 28, 4) == 0xf) {
> -if (disas_vfp_uncond(s, insn)) {
> -return 0;
> -}
> -} else {
> -if (disas_vfp(s, insn)) {
> -return 0;
> -}
> -}
> -/* If the decodetree decoder didn't handle this insn, it must be UNDEF */
> -return 1;
> -}

Before this change, if this was a cp10/11 insn and
neither disas_vfp_uncond() nor disas_vfp() returned true,
we would UNDEF it.

> -
>  static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
>  {
>  #ifndef CONFIG_USER_ONLY
> @@ -10524,7 +10499,9 @@ static void disas_arm_insn(DisasContext *s, unsigned 
> int insn)
>  ARCH(5);
>
>  /* Unconditional instructions.  */
> -if (disas_a32_uncond(s, insn)) {
> +/* TODO: Perhaps merge these into one decodetree output file.  */
> +if (disas_a32_uncond(s, insn) ||
> +disas_vfp_uncond(s, insn)) {
>  return;
>  }
>  /* fall back to legacy decoder */
> @@ -10551,13 +10528,6 @@ static void disas_arm_insn(DisasContext *s, unsigned 
> int insn)
>  }
>  return;
>  }
> -if ((insn & 0x0f000e10) == 0x0e000a00) {
> -/* VFP.  */
> -if (disas_vfp_insn(s, insn)) {
> -goto illegal_op;
> -}
> -return;
> -}
>  if ((insn & 0x0e000f00) == 0x0c000100) {
>  if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
>  /* iWMMXt register transfer.  */
> @@ -10588,7 +10558,9 @@ static void disas_arm_insn(DisasContext *s, unsigned 
> int insn)
>  arm_skip_unless(s, cond);
>  }
>
> -if (disas_a32(s, insn)) {
> +/* TODO: Perhaps merge these into one decodetree output file.  */
> +if (disas_a32(s, insn) ||
> +disas_vfp(s, insn)) {
>  return;
>  }
>  /* fall back to legacy decoder */
> @@ -10597,12 +10569,7 @@ static void disas_arm_insn(DisasContext *s, unsigned 
> int insn)
>  case 0xc:
>  case 0xd:
>  case 0xe:
> -if (((insn >> 8) & 0xe) == 10) {
> -/* VFP.  */
> -if (disas_vfp_insn(s, insn)) {
> -goto illegal_op;
> -}
> -} else if (disas_coproc_insn(s, insn)) {
> +if (((insn >> 8) & 0xe) != 10 && disas_coproc_insn(s, insn)) {
>  /* Coprocessor.  */
>  goto illegal_op;
>  }

But now if the VFP decodetree doesn't handle the insn,
we'll fall into this case here, I think, the
"(((insn >> 8) & 0xe) != 10" part of the condition will
be false, and we'll end up at a 'break' statement, which
I think means we'll end up doing a NOP rather than an UNDEF.



> @@ -10691,7 +10658,14 @@ static void disas_thumb2_insn(DisasContext *s, 
> uint32_t insn)
>  ARCH(6T2);
>  }
>
> -if (disas_t32(s, insn)) {
> +/*
> + * TODO: Perhaps merge these into one decodetree output file.
> + * Note disas_vfp is written for a32 with cond field in the
> + * top nibble.  The t32 encoding requires 0xe in the top nibble.
> + */
> +if (disas_t32(s, insn) ||
> +disas_vfp_uncond(s, insn) ||
> +((insn >> 28) == 0xe && disas_vfp(s, insn))) {
>  return;
>  }
>  /* fall back to legacy decoder */
> @@ -10708,17 +10682,15 @@ static void disas_thumb2_insn(DisasContext *s, 
> uint32_t insn)
>  goto illegal_op; /* op0 = 0b11 : unallocated */
>  }
>
> -if (disas_vfp_insn(s, insn)) {
> -if (((insn >> 8) & 0xe) == 10 &&
> -dc_isar_feature(aa32_fpsp_v2, s)) {
> -/* FP, and the CPU supports it */
> -goto illegal_op;
> -} else {
> -/* All other insns: NOCP */
> -gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
> - 

Re: [PATCH v4 00/11] RFC: [for 5.0]: HMP monitor handlers refactoring

2020-02-20 Thread Maxim Levitsky
On Fri, 2020-02-07 at 18:28 +, Dr. David Alan Gilbert wrote:
> * Maxim Levitsky (mlevi...@redhat.com) wrote:
> > On Mon, 2020-02-03 at 19:57 +, Dr. David Alan Gilbert wrote:
> > > * Maxim Levitsky (mlevi...@redhat.com) wrote:
> > > > This patch series is bunch of cleanups to the hmp monitor code.
> > > > It mostly moves the blockdev related hmp handlers to its own file,
> > > > and does some minor refactoring.
> > > > 
> > > > No functional changes expected.
> > > 
> > > You've still got the title marked as RFC - are you actually ready for
> > > this log?
> > 
> > I forgot to update this to be honest, I don't consider this an RFC,
> > especially since I dropped for now the patches that might cause
> > issues. This is now just a nice refactoring.
> 
> OK, so if we can get some block people to say they're happy, then
> I'd be happy to take this through HMP or they can take it through block.


Any update?

Best regards,
Maxim Levitsky

> 
> Dave
> 
> > Best regards,
> > Maxim Levitsky
> > 
> > > 
> > > Dave
> > > 
> > > > 
> > > > Changes from V1:
> > > >* move the handlers to block/monitor/block-hmp-cmds.c
> > > >* tiny cleanup for the commit messages
> > > > 
> > > > Changes from V2:
> > > >* Moved all the function prototypes to new header 
> > > > (blockdev-hmp-cmds.h)
> > > >* Set the license of blockdev-hmp-cmds.c to GPLv2+
> > > >* Moved hmp_snapshot_* functions to blockdev-hmp-cmds.c
> > > >* Moved hmp_drive_add_node to blockdev-hmp-cmds.c
> > > >  (this change needed some new exports, thus in separate new patch)
> > > >* Moved hmp_qemu_io and hmp_eject to blockdev-hmp-cmds.c
> > > >* Added 'error:' prefix to vreport, and updated the iotests
> > > >  This is invasive change, but really feels like the right one
> > > >* Added minor refactoring patch that drops an unused #include
> > > > 
> > > > Changes from V3:
> > > >* Dropped the error prefix patches for now due to fact that it seems
> > > >  that libvirt doesn't need that after all. Oh well...
> > > >  I'll send them in a separate series.
> > > > 
> > > >* Hopefully correctly merged the copyright info the new files
> > > >  Both files are GPLv2 now (due to code from hmp.h/hmp-cmds.c)
> > > > 
> > > >* Addressed review feedback
> > > >* Renamed the added header to block-hmp-cmds.h
> > > > 
> > > >* Got rid of checkpatch.pl warnings in the moved code
> > > >  (cosmetic code changes only)
> > > > 
> > > >* I kept the reviewed-by tags, since the changes I did are minor.
> > > >  I hope that this is right thing to do.
> > > > 
> > > > Best regards,
> > > > Maxim Levitsky
> > > > 
> > > > Maxim Levitsky (11):
> > > >   usb/dev-storage: remove unused include
> > > >   monitor/hmp: uninline add_init_drive
> > > >   monitor/hmp: rename device-hotplug.c to block/monitor/block-hmp-cmds.c
> > > >   monitor/hmp: move hmp_drive_del and hmp_commit to block-hmp-cmds.c
> > > >   monitor/hmp: move hmp_drive_mirror and hmp_drive_backup to
> > > > block-hmp-cmds.c Moved code was added after 2012-01-13, thus under
> > > > GPLv2+
> > > >   monitor/hmp: move hmp_block_job* to block-hmp-cmds.c
> > > >   monitor/hmp: move hmp_snapshot_* to block-hmp-cmds.c
> > > > hmp_snapshot_blkdev is from GPLv2 version of the hmp-cmds.c thus
> > > > have to change the licence to GPLv2
> > > >   monitor/hmp: move hmp_nbd_server* to block-hmp-cmds.c
> > > >   monitor/hmp: move remaining hmp_block* functions to block-hmp-cmds.c
> > > >   monitor/hmp: move hmp_info_block* to block-hmp-cmds.c
> > > >   monitor/hmp: Move hmp_drive_add_node to block-hmp-cmds.c
> > > > 
> > > >  MAINTAINERS|1 +
> > > >  Makefile.objs  |2 +-
> > > >  block/Makefile.objs|1 +
> > > >  block/monitor/Makefile.objs|1 +
> > > >  block/monitor/block-hmp-cmds.c | 1002 
> > > >  blockdev.c |  137 +
> > > >  device-hotplug.c   |   91 ---
> > > >  hw/usb/dev-storage.c   |1 -
> > > >  include/block/block-hmp-cmds.h |   54 ++
> > > >  include/block/block_int.h  |5 +-
> > > >  include/monitor/hmp.h  |   24 -
> > > >  include/sysemu/blockdev.h  |4 -
> > > >  include/sysemu/sysemu.h|3 -
> > > >  monitor/hmp-cmds.c |  769 
> > > >  monitor/misc.c |1 +
> > > >  15 files changed, 1072 insertions(+), 1024 deletions(-)
> > > >  create mode 100644 block/monitor/Makefile.objs
> > > >  create mode 100644 block/monitor/block-hmp-cmds.c
> > > >  delete mode 100644 device-hotplug.c
> > > >  create mode 100644 include/block/block-hmp-cmds.h
> > > > 
> > > > -- 
> > > > 2.17.2
> > > > 
> > > 
> > > --
> > > Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
> > 
> > 
> 
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK





Re: [PATCH] s390/sclp: improve special wait psw logic

2020-02-20 Thread Cornelia Huck
On Thu, 20 Feb 2020 14:16:22 +0100
Christian Borntraeger  wrote:

> There is a special quiesce PSW that we check for "shutdown". Otherwise 
> disabled
> wait is detected as "crashed". Architecturally we must only check PSW bits
> 116-127. Fix this.
> 
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Christian Borntraeger 
> ---
>  target/s390x/helper.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Thanks, applied.




Re: [PATCH 13/19] target/arm: Move VLLDM and VLSTM to vfp.decode

2020-02-20 Thread Peter Maydell
On Fri, 14 Feb 2020 at 18:16, Richard Henderson
 wrote:
>
> Now that we no longer have an early check for ARM_FEATURE_VFP,
> we can use the proper ISA check in trans_VLLDM_VLSTM.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/arm/vfp.decode  |  2 ++
>  target/arm/translate-vfp.inc.c | 35 ++
>  target/arm/translate.c | 53 ++
>  3 files changed, 46 insertions(+), 44 deletions(-)
>
> diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
> index a67b3f29ee..592fe9e1e4 100644
> --- a/target/arm/vfp.decode
> +++ b/target/arm/vfp.decode
> @@ -242,3 +242,5 @@ VCVT_sp_int   1110 1.11 110 s:1  1010 rz:1 1.0 
>  \
>   vd=%vd_sp vm=%vm_sp
>  VCVT_dp_int   1110 1.11 110 s:1  1011 rz:1 1.0  \
>   vd=%vd_sp vm=%vm_dp
> +
> +VLLDM_VLSTM  1110 1100 001 l:1 rn:4  1010  
> diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
> index f6f7601fe2..8f2b97e0e7 100644
> --- a/target/arm/translate-vfp.inc.c
> +++ b/target/arm/translate-vfp.inc.c
> @@ -2816,3 +2816,38 @@ static bool trans_VCVT_dp_int(DisasContext *s, 
> arg_VCVT_dp_int *a)
>  tcg_temp_free_ptr(fpst);
>  return true;
>  }
> +
> +/*
> + * Decode VLLDM of VLSTM are nonstandard because:

"Decode of VLLDM and VLSTM"

> + *  * if there is no FPU then these insns must NOP in
> + *Secure state and UNDEF in Nonsecure state
> + *  * if there is an FPU then these insns do not have
> + *the usual behaviour that disas_vfp_insn() provides of
> + *being controlled by CPACR/NSACR enable bits or the
> + *lazy-stacking logic.

s/disas_vfp_insn/vfp_access_check/ (we never updated this
old comment when we moved the logic as part of the decodetree
conversion).

> + */
> +static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
> +{
> +TCGv_i32 fptr;
> +
> +if (!arm_dc_feature(s, ARM_FEATURE_M) ||
> +!arm_dc_feature(s, ARM_FEATURE_V8)) {
> +return false;
> +}
> +if (!dc_isar_feature(aa32_fpsp_v2, s)) {
> +/* No FPU: NOP if secure, otherwise UNDEF.  */
> +return s->v8m_secure;
> +}

We need to UNDEF if !v8m_secure even if there is an FPU.

> +
> +fptr = load_reg(s, a->rn);
> +if (a->l) {
> +gen_helper_v7m_vlldm(cpu_env, fptr);
> +} else {
> +gen_helper_v7m_vlstm(cpu_env, fptr);
> +}
> +tcg_temp_free_i32(fptr);
> +
> +/* End the TB, because we have updated FP control bits */
> +s->base.is_jmp = DISAS_UPDATE;
> +return true;
> +}

thanks
-- PMM



[PATCH] qdev-monitor: Forbid repeated device_del

2020-02-20 Thread Julia Suvorova
Device unplug can be done asynchronously. Thus, sending the second
device_del before the previous unplug is complete may lead to
unexpected results. On PCIe devices, this cancels the hot-unplug
process.

Signed-off-by: Julia Suvorova 
---
 qdev-monitor.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/qdev-monitor.c b/qdev-monitor.c
index 8ce71a206b..8a2a9538cd 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -887,6 +887,12 @@ void qmp_device_del(const char *id, Error **errp)
 {
 DeviceState *dev = find_device_state(id, errp);
 if (dev != NULL) {
+if (dev->pending_deleted_event) {
+error_setg(errp, "Device %s is already in the "
+ "process of unplug", id);
+return;
+}
+
 qdev_unplug(dev, errp);
 }
 }
-- 
2.24.1




  1   2   3   4   >