Re: [Qemu-devel] [PATCH 05/12] migration: show the statistics of compression

2018-06-13 Thread Xiao Guangrong




On 06/14/2018 12:25 AM, Dr. David Alan Gilbert wrote:
 }
  
  static void migration_bitmap_sync(RAMState *rs)

@@ -1412,6 +1441,9 @@ static void flush_compressed_data(RAMState *rs)
  qemu_mutex_lock(&comp_param[idx].mutex);
  if (!comp_param[idx].quit) {
  len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
+/* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
+compression_counters.reduced_size += TARGET_PAGE_SIZE - len + 8;


I think I'd rather save just len+8 rather than than the subtraction.


Hmm, is this what you want?
  compression_counters.reduced_size += len - 8;

Then calculate the real reduced size in populate_ram_info() where we return this
info to the user:
  info->compression->reduced_size = compression_counters.pages * PAGE_SIZE 
- compression_counters.reduced_size;

Right?


I think other than that, and Eric's comments, it's OK.



Thanks.



[Qemu-devel] [PATCH 1/7] spapr: Clean up cpu realize/unrealize paths

2018-06-13 Thread David Gibson
spapr_cpu_init() and spapr_cpu_destroy() are only called from the spapr
cpu core realize/unrealize paths, and really can only be called from there.

Those are all short functions, so fold the pairs together for simplicity.
While we're there rename some functions and change some parameter types
for brevity and clarity.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr_cpu_core.c | 69 +++--
 1 file changed, 25 insertions(+), 44 deletions(-)

diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index f3e9b879b2..7fdb3b6667 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -83,26 +83,6 @@ void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong 
nip, target_ulong r
 ppc_store_lpcr(cpu, env->spr[SPR_LPCR] | pcc->lpcr_pm);
 }
 
-static void spapr_cpu_destroy(PowerPCCPU *cpu)
-{
-qemu_unregister_reset(spapr_cpu_reset, cpu);
-}
-
-static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
-   Error **errp)
-{
-CPUPPCState *env = &cpu->env;
-
-/* Set time-base frequency to 512 MHz */
-cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
-
-cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
-kvmppc_set_papr(cpu);
-
-qemu_register_reset(spapr_cpu_reset, cpu);
-spapr_cpu_reset(cpu);
-}
-
 /*
  * Return the sPAPR CPU core type for @model which essentially is the CPU
  * model specified with -cpu cmdline option.
@@ -122,44 +102,47 @@ const char *spapr_get_cpu_core_type(const char *cpu_type)
 return object_class_get_name(oc);
 }
 
-static void spapr_cpu_core_unrealizefn(DeviceState *dev, Error **errp)
+static void spapr_unrealize_vcpu(PowerPCCPU *cpu)
+{
+qemu_unregister_reset(spapr_cpu_reset, cpu);
+object_unparent(cpu->intc);
+cpu_remove_sync(CPU(cpu));
+object_unparent(OBJECT(cpu));
+}
+
+static void spapr_cpu_core_unrealize(DeviceState *dev, Error **errp)
 {
 sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
 CPUCore *cc = CPU_CORE(dev);
 int i;
 
 for (i = 0; i < cc->nr_threads; i++) {
-Object *obj = OBJECT(sc->threads[i]);
-DeviceState *dev = DEVICE(obj);
-CPUState *cs = CPU(dev);
-PowerPCCPU *cpu = POWERPC_CPU(cs);
-
-spapr_cpu_destroy(cpu);
-object_unparent(cpu->intc);
-cpu_remove_sync(cs);
-object_unparent(obj);
+spapr_unrealize_vcpu(sc->threads[i]);
 }
 g_free(sc->threads);
 }
 
-static void spapr_cpu_core_realize_child(Object *child,
- sPAPRMachineState *spapr, Error 
**errp)
+static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+   Error **errp)
 {
+CPUPPCState *env = &cpu->env;
 Error *local_err = NULL;
-CPUState *cs = CPU(child);
-PowerPCCPU *cpu = POWERPC_CPU(cs);
 
-object_property_set_bool(child, true, "realized", &local_err);
+object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
 if (local_err) {
 goto error;
 }
 
-spapr_cpu_init(spapr, cpu, &local_err);
-if (local_err) {
-goto error;
-}
+/* Set time-base frequency to 512 MHz */
+cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
+
+cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
+kvmppc_set_papr(cpu);
 
-cpu->intc = icp_create(child, spapr->icp_type, XICS_FABRIC(spapr),
+qemu_register_reset(spapr_cpu_reset, cpu);
+spapr_cpu_reset(cpu);
+
+cpu->intc = icp_create(OBJECT(cpu), spapr->icp_type, XICS_FABRIC(spapr),
&local_err);
 if (local_err) {
 goto error;
@@ -220,9 +203,7 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error 
**errp)
 }
 
 for (j = 0; j < cc->nr_threads; j++) {
-obj = OBJECT(sc->threads[j]);
-
-spapr_cpu_core_realize_child(obj, spapr, &local_err);
+spapr_realize_vcpu(sc->threads[j], spapr, &local_err);
 if (local_err) {
 goto err;
 }
@@ -249,7 +230,7 @@ static void spapr_cpu_core_class_init(ObjectClass *oc, void 
*data)
 sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_CLASS(oc);
 
 dc->realize = spapr_cpu_core_realize;
-dc->unrealize = spapr_cpu_core_unrealizefn;
+dc->unrealize = spapr_cpu_core_unrealize;
 dc->props = spapr_cpu_core_properties;
 scc->cpu_type = data;
 }
-- 
2.17.1




[Qemu-devel] [PATCH 5/7] pnv: Add cpu unrealize path

2018-06-13 Thread David Gibson
Currently we don't have any unrealize path for pnv cpu cores.  We get away
with this because we don't yet support cpu hotplug for pnv.

However, we're going to want it eventually, and in the meantime, it makes
it non-obvious why there are a bunch of allocations on the realize() path
that don't have matching frees.

So, implement the missing unrealize path.

Signed-off-by: David Gibson 
---
 hw/ppc/pnv_core.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index c9648fd1ad..c70dbbe056 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -192,6 +192,26 @@ err:
 error_propagate(errp, local_err);
 }
 
+static void pnv_unrealize_vcpu(PowerPCCPU *cpu)
+{
+qemu_unregister_reset(pnv_cpu_reset, cpu);
+object_unparent(cpu->intc);
+cpu_remove_sync(CPU(cpu));
+object_unparent(OBJECT(cpu));
+}
+
+static void pnv_core_unrealize(DeviceState *dev, Error **errp)
+{
+PnvCore *pc = PNV_CORE(dev);
+CPUCore *cc = CPU_CORE(dev);
+int i;
+
+for (i = 0; i < cc->nr_threads; i++) {
+pnv_unrealize_vcpu(pc->threads[i]);
+}
+g_free(pc->threads);
+}
+
 static Property pnv_core_properties[] = {
 DEFINE_PROP_UINT32("pir", PnvCore, pir, 0),
 DEFINE_PROP_END_OF_LIST(),
@@ -202,6 +222,7 @@ static void pnv_core_class_init(ObjectClass *oc, void *data)
 DeviceClass *dc = DEVICE_CLASS(oc);
 
 dc->realize = pnv_core_realize;
+dc->unrealize = pnv_core_unrealize;
 dc->props = pnv_core_properties;
 }
 
-- 
2.17.1




[Qemu-devel] [PATCH 3/7] pnv_core: Allocate cpu thread objects individually

2018-06-13 Thread David Gibson
Currently, we allocate space for all the cpu objects within a single core
in one big block.  This was copied from an older version of the spapr code
and requires some ugly pointer manipulation to extract the individual
objects.

This design was due to a misunderstanding of qemu lifetime conventions and
has already been changed in spapr (in 94ad93bd "spapr_cpu_core: instantiate
CPUs separately".

Make an equivalent change in pnv_core to get rid of the nasty pointer
arithmetic.

Signed-off-by: David Gibson 
---
 hw/ppc/pnv.c  |  4 ++--
 hw/ppc/pnv_core.c | 11 +--
 include/hw/ppc/pnv_core.h |  2 +-
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 0314881316..0b9508d94d 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -121,9 +121,9 @@ static int get_cpus_node(void *fdt)
  */
 static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt)
 {
-CPUState *cs = CPU(DEVICE(pc->threads));
+PowerPCCPU *cpu = pc->threads[0];
+CPUState *cs = CPU(cpu);
 DeviceClass *dc = DEVICE_GET_CLASS(cs);
-PowerPCCPU *cpu = POWERPC_CPU(cs);
 int smt_threads = CPU_CORE(pc)->nr_threads;
 CPUPPCState *env = &cpu->env;
 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index efb68226bb..59309e149c 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -151,7 +151,6 @@ static void pnv_core_realize(DeviceState *dev, Error **errp)
 PnvCore *pc = PNV_CORE(OBJECT(dev));
 CPUCore *cc = CPU_CORE(OBJECT(dev));
 const char *typename = pnv_core_cpu_typename(pc);
-size_t size = object_type_get_instance_size(typename);
 Error *local_err = NULL;
 void *obj;
 int i, j;
@@ -165,11 +164,11 @@ static void pnv_core_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-pc->threads = g_malloc0(size * cc->nr_threads);
+pc->threads = g_new(PowerPCCPU *, cc->nr_threads);
 for (i = 0; i < cc->nr_threads; i++) {
-obj = pc->threads + i * size;
+obj = object_new(typename);
 
-object_initialize(obj, size, typename);
+pc->threads[i] = POWERPC_CPU(obj);
 
 snprintf(name, sizeof(name), "thread[%d]", i);
 object_property_add_child(OBJECT(pc), name, obj, &local_err);
@@ -185,7 +184,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp)
 }
 
 for (j = 0; j < cc->nr_threads; j++) {
-obj = pc->threads + j * size;
+obj = OBJECT(pc->threads[j]);
 
 pnv_core_realize_child(obj, XICS_FABRIC(xi), &local_err);
 if (local_err) {
@@ -200,7 +199,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp)
 
 err:
 while (--i >= 0) {
-obj = pc->threads + i * size;
+obj = OBJECT(pc->threads[i]);
 object_unparent(obj);
 }
 g_free(pc->threads);
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index e337af7a3a..447ae761f7 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -34,7 +34,7 @@ typedef struct PnvCore {
 CPUCore parent_obj;
 
 /*< public >*/
-void *threads;
+PowerPCCPU **threads;
 uint32_t pir;
 
 MemoryRegion xscom_regs;
-- 
2.17.1




[Qemu-devel] [PATCH 7/7] target/ppc, spapr: Move VPA information to machine_data

2018-06-13 Thread David Gibson
CPUPPCState currently contains a number of fields containing the state of
the VPA.  The VPA is a PAPR specific concept covering several guest/host
shared memory areas used to communicate some information with the
hypervisor.

As a PAPR concept this is really machine specific information, although it
is per-cpu, so it doesn't really belong in the core CPU state structure.
So, move it to the PAPR specific 'machine_data' structure.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr_cpu_core.c |  7 +++
 hw/ppc/spapr_hcall.c| 77 ++---
 include/hw/ppc/spapr_cpu_core.h |  3 ++
 target/ppc/cpu.h|  6 ---
 target/ppc/translate_init.inc.c |  8 
 5 files changed, 52 insertions(+), 49 deletions(-)

diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 544bda93e2..f642c95967 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -28,6 +28,7 @@ static void spapr_cpu_reset(void *opaque)
 CPUState *cs = CPU(cpu);
 CPUPPCState *env = &cpu->env;
 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 target_ulong lpcr;
 
 cpu_reset(cs);
@@ -69,6 +70,12 @@ static void spapr_cpu_reset(void *opaque)
 
 /* Set a full AMOR so guest can use the AMR as it sees fit */
 env->spr[SPR_AMOR] = 0xull;
+
+spapr_cpu->vpa_addr = 0;
+spapr_cpu->slb_shadow_addr = 0;
+spapr_cpu->slb_shadow_size = 0;
+spapr_cpu->dtl_addr = 0;
+spapr_cpu->dtl_size = 0;
 }
 
 void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip, target_ulong 
r3)
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 8b9a4b577f..ae913d070f 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -8,6 +8,7 @@
 #include "exec/exec-all.h"
 #include "helper_regs.h"
 #include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
 #include "mmu-hash64.h"
 #include "cpu-models.h"
 #include "trace.h"
@@ -908,9 +909,11 @@ unmap_out:
 #define VPA_SHARED_PROC_OFFSET 0x9
 #define VPA_SHARED_PROC_VAL0x2
 
-static target_ulong register_vpa(CPUPPCState *env, target_ulong vpa)
+static target_ulong register_vpa(PowerPCCPU *cpu, target_ulong vpa)
 {
-CPUState *cs = CPU(ppc_env_get_cpu(env));
+CPUState *cs = CPU(cpu);
+CPUPPCState *env = &cpu->env;
+sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 uint16_t size;
 uint8_t tmp;
 
@@ -935,32 +938,34 @@ static target_ulong register_vpa(CPUPPCState *env, 
target_ulong vpa)
 return H_PARAMETER;
 }
 
-env->vpa_addr = vpa;
+spapr_cpu->vpa_addr = vpa;
 
-tmp = ldub_phys(cs->as, env->vpa_addr + VPA_SHARED_PROC_OFFSET);
+tmp = ldub_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET);
 tmp |= VPA_SHARED_PROC_VAL;
-stb_phys(cs->as, env->vpa_addr + VPA_SHARED_PROC_OFFSET, tmp);
+stb_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET, tmp);
 
 return H_SUCCESS;
 }
 
-static target_ulong deregister_vpa(CPUPPCState *env, target_ulong vpa)
+static target_ulong deregister_vpa(PowerPCCPU *cpu, target_ulong vpa)
 {
-if (env->slb_shadow_addr) {
+sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
+
+if (spapr_cpu->slb_shadow_addr) {
 return H_RESOURCE;
 }
 
-if (env->dtl_addr) {
+if (spapr_cpu->dtl_addr) {
 return H_RESOURCE;
 }
 
-env->vpa_addr = 0;
+spapr_cpu->vpa_addr = 0;
 return H_SUCCESS;
 }
 
-static target_ulong register_slb_shadow(CPUPPCState *env, target_ulong addr)
+static target_ulong register_slb_shadow(PowerPCCPU *cpu, target_ulong addr)
 {
-CPUState *cs = CPU(ppc_env_get_cpu(env));
+sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 uint32_t size;
 
 if (addr == 0) {
@@ -968,7 +973,7 @@ static target_ulong register_slb_shadow(CPUPPCState *env, 
target_ulong addr)
 return H_HARDWARE;
 }
 
-size = ldl_be_phys(cs->as, addr + 0x4);
+size = ldl_be_phys(CPU(cpu)->as, addr + 0x4);
 if (size < 0x8) {
 return H_PARAMETER;
 }
@@ -977,26 +982,28 @@ static target_ulong register_slb_shadow(CPUPPCState *env, 
target_ulong addr)
 return H_PARAMETER;
 }
 
-if (!env->vpa_addr) {
+if (!spapr_cpu->vpa_addr) {
 return H_RESOURCE;
 }
 
-env->slb_shadow_addr = addr;
-env->slb_shadow_size = size;
+spapr_cpu->slb_shadow_addr = addr;
+spapr_cpu->slb_shadow_size = size;
 
 return H_SUCCESS;
 }
 
-static target_ulong deregister_slb_shadow(CPUPPCState *env, target_ulong addr)
+static target_ulong deregister_slb_shadow(PowerPCCPU *cpu, target_ulong addr)
 {
-env->slb_shadow_addr = 0;
-env->slb_shadow_size = 0;
+sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
+
+spapr_cpu->slb_shadow_addr = 0;
+spapr_cpu->slb_shadow_size = 0;
 return H_SUCCESS;
 }
 
-static target_ulong register_dtl(CPUPPCState *env, target_ulong addr)
+static target_ulong register_dtl(PowerPCCPU *cpu, target_ulong addr)
 {

[Qemu-devel] [PATCH 6/7] target/ppc: Replace intc pointer with a general machine_data pointer

2018-06-13 Thread David Gibson
PowerPCCPU contains an (Object *)intc used to point to the cpu's interrupt
controller.  Or more precisely to the "presentation" component of the
interrupt controller relevant to this cpu.

Really, this field is machine specific.  The machines which use it can
point it to different types of object depending on their needs, and most
machines don't use it at all (since they have older style PICs which don't
have per-cpu presentation components).

There's also other information that's per-cpu, but platform/machine
specific.  So replace the intc pointer with a (void *)machine_data which
can be managed as the machine type likes to conveniently store per cpu
information.

Signed-off-by: David Gibson 
---
 hw/intc/xics.c  |  5 +++--
 hw/intc/xics_spapr.c| 16 +++-
 hw/ppc/pnv.c|  4 ++--
 hw/ppc/pnv_core.c   | 11 +--
 hw/ppc/spapr.c  |  8 
 hw/ppc/spapr_cpu_core.c | 13 ++---
 include/hw/ppc/pnv_core.h   |  9 +
 include/hw/ppc/spapr_cpu_core.h | 10 ++
 include/hw/ppc/xics.h   |  4 ++--
 target/ppc/cpu.h|  2 +-
 10 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index e73e623e3b..689ad44e5f 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -383,7 +383,8 @@ static const TypeInfo icp_info = {
 .class_size = sizeof(ICPStateClass),
 };
 
-Object *icp_create(Object *cpu, const char *type, XICSFabric *xi, Error **errp)
+ICPState *icp_create(Object *cpu, const char *type, XICSFabric *xi,
+ Error **errp)
 {
 Error *local_err = NULL;
 Object *obj;
@@ -401,7 +402,7 @@ Object *icp_create(Object *cpu, const char *type, 
XICSFabric *xi, Error **errp)
 obj = NULL;
 }
 
-return obj;
+return ICP(obj);
 }
 
 /*
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 2e27b92b87..01c76717cf 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -31,6 +31,7 @@
 #include "trace.h"
 #include "qemu/timer.h"
 #include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
 #include "hw/ppc/xics.h"
 #include "hw/ppc/fdt.h"
 #include "qapi/visitor.h"
@@ -43,8 +44,9 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState 
*spapr,
target_ulong opcode, target_ulong *args)
 {
 target_ulong cppr = args[0];
+sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 
-icp_set_cppr(ICP(cpu->intc), cppr);
+icp_set_cppr(spapr_cpu->icp, cppr);
 return H_SUCCESS;
 }
 
@@ -65,7 +67,8 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState 
*spapr,
 static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
 {
-uint32_t xirr = icp_accept(ICP(cpu->intc));
+sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
+uint32_t xirr = icp_accept(spapr_cpu->icp);
 
 args[0] = xirr;
 return H_SUCCESS;
@@ -74,7 +77,8 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState 
*spapr,
 static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
  target_ulong opcode, target_ulong *args)
 {
-uint32_t xirr = icp_accept(ICP(cpu->intc));
+sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
+uint32_t xirr = icp_accept(spapr_cpu->icp);
 
 args[0] = xirr;
 args[1] = cpu_get_host_ticks();
@@ -84,9 +88,10 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
   target_ulong opcode, target_ulong *args)
 {
+sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 target_ulong xirr = args[0];
 
-icp_eoi(ICP(cpu->intc), xirr);
+icp_eoi(spapr_cpu->icp, xirr);
 return H_SUCCESS;
 }
 
@@ -94,7 +99,8 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 target_ulong opcode, target_ulong *args)
 {
 uint32_t mfrr;
-uint32_t xirr = icp_ipoll(ICP(cpu->intc), &mfrr);
+sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
+uint32_t xirr = icp_ipoll(spapr_cpu->icp, &mfrr);
 
 args[0] = xirr;
 args[1] = mfrr;
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 0b9508d94d..3a36c6ac6a 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1013,7 +1013,7 @@ static ICPState *pnv_icp_get(XICSFabric *xi, int pir)
 {
 PowerPCCPU *cpu = ppc_get_vcpu_by_pir(pir);
 
-return cpu ? ICP(cpu->intc) : NULL;
+return cpu ? pnv_cpu_state(cpu)->icp : NULL;
 }
 
 static void pnv_pic_print_info(InterruptStatsProvider *obj,
@@ -1026,7 +1026,7 @@ static void pnv_pic_print_info(InterruptStatsProvider 
*obj,
 CPU_FOREACH(cs) {
 PowerPCCPU *cpu = POWERPC_CPU(cs);
 
-icp_pic_print_info(ICP(cpu->intc), mon);
+icp_pic_print_info(pnv_cpu_state(cpu)->icp, mon);
 }
 
 for (i = 0; i < pnv->num_chips; i++) {
diff 

Re: [Qemu-devel] [PATCH v2] pnv: add a physical mapping array describing MMIO ranges in each chip

2018-06-13 Thread Cédric Le Goater
On 06/13/2018 02:47 AM, David Gibson wrote:
> On Tue, Jun 12, 2018 at 08:13:49AM +0200, Cédric Le Goater wrote:
>> On 06/12/2018 07:58 AM, David Gibson wrote:
>>> On Wed, Jun 06, 2018 at 09:04:10AM +0200, Cédric Le Goater wrote:
 On 06/06/2018 08:39 AM, David Gibson wrote:
> On Wed, May 30, 2018 at 12:07:54PM +0200, Cédric Le Goater wrote:
>> Based on previous work done in skiboot, the physical mapping array
>> helps in calculating the MMIO ranges of each controller depending on
>> the chip id and the chip type. This is will be particularly useful for
>> the P9 models which use less the XSCOM bus and rely more on MMIOs.
>>
>> A link on the chip is now necessary to calculate MMIO BARs and
>> sizes. This is why such a link is introduced in the PSIHB model.
>
> I think this message needs some work.  This says what it's for, but
> what actually *is* this array, and how does it work?

 OK. It is relatively simple: each controller has an entry defining its 
 MMIO range. 

> The outside-core differences between POWER8 and POWER9 are substantial
> enough that I'm wondering if pnvP8 and pnvP9 would be better off as
> different machine types (sharing some routines, of course).

 yes and no. I have survived using a common PnvChip framework but
 it is true that I had to add P9 classes for each: LPC, PSI, OCC 
 They are very similar but not enough. P9 uses much more MMIOs than 
 P8 which still uses a lot of XSCOM. I haven't looked at PHB4. 
>>>
>>> Well, it's certainly *possible* to use the same machine type, I'm just
>>> not convinced it's a good idea.  It seems kind of dodgy to me that so
>>> many peripherals on the system change as a side-effect of setting the
>>> cpu.  Compare to how x86 works where cpu really does change the CPU,
>>> plugging it into the same virtual "chipset".  Different chipsets *are*
>>> different machine types there (pc vs. q35).
>>
>> OK, I agree, and we can use a set of common routines to instantiate the 
>> different chipset models. 
>>
>> So we would have a common pnv_init() routine to initialize the different 
>> 'powernv8' and 'powernv9' machines and the PnvChip typename would be a 
>> machine class attribute ?
> 
> Well.. that's one option.  Usually for these things, it works out
> better to instead of parameterizing big high-level routines like
> pnv_init(), you have separate versions of those calling a combination
> of case-specific and common routines as necessary.
> 
> Mostly it just comes down to what is simplest to implement for you, though.

I am introducing a powernv8 machine, the machine init routine is still
generic and did not change much. But I have deepen the PnvChip class
inheritance hierarchy with an intermediate class taking care of the
Chip sub controllers, which gives us something like :

  pnv_init()
. skiboot
. kernel
. initrd
. chips creation
. platform bus/device :
   isa bus
   pci layout
   bmc handling.

  p8 chip hierarchy:
 
   power8_v2.0-pnv-chip (gives the cpu type)
   pnv8-chip   : creates the devices only   
   pnv-chip: creates xscom and the cores 

The powervn9 machine has this hierarchy :

   power9_v2.0-pnv-chip
   pnv9-chip
   pnv-chip

I had to introduce these new PnvChipClass ops : 

void (*realize)(PnvChip *chip, Error **errp);
Object *(*intc_create)(PnvChip *chip, Object *child, Error **errp);
ISABus *(*isa_create)(PnvChip *chip);

Overall, it's looking fine and it should remove most of these tests :

 pnv_chip_is_power9(chip)

If not, it means we are missing a PnvChipClass ops anyway.

I will send a patchset this week, the final one will shuffle quite a
bit of code and the resulting diff will be a bit fuzy. You will have
to trust me on this one.

 
>> Nevertheless we would still need to introduce "a physical mapping array 
>> describing MMIO ranges" but we can start by differentiating the chipsets 
>> first.
> 
> Well, maybe.  I'm wondering if you can more easily encapsulate the
> information in that array in a top-level init routine, that calls
> common helpers with different addresses / device types / whatever.

Hmm, I think I understand but could you give me a prototype example. 
Please. To make sure.

I would like to keep the array somewhere because, in a quick look, 
it gives you an overview of the POWER Processor address space. 

Thanks,

C.
 




Re: [Qemu-devel] [PATCH v2] CODING_STYLE: Define our preferred form for multiline comments

2018-06-13 Thread Cornelia Huck
On Tue, 12 Jun 2018 12:47:29 -0600
Alex Williamson  wrote:

> On Tue, 12 Jun 2018 20:12:02 +0200
> Thomas Huth  wrote:
> 
> > On 12.06.2018 19:30, Richard Henderson wrote:  
> > > On 06/11/2018 04:17 AM, Peter Maydell wrote:
> > >> The codebase has a bit of a mix of different multiline
> > >> comment styles. State a preference for the Linux kernel
> > >> style:
> > >> /*
> > >>  * Star on the left for each line.
> > >>  * Leading slash-star and trailing star-slash
> > >>  * each go on a line of their own.
> > >>  */
> > >>
> > >> Signed-off-by: Peter Maydell 
> > >> ---
> > >> This is not my personal favourite, but seemed to be the
> > >> closest we had to consensus in the mail thread for v1;
> > >> I can live with it in order to avoid getting patches which
> > >> use the styles I like even less :-)
> > > 
> > > Honestly, I don't like this except for "important" comments.
> > > 
> > > A "small" comment, e.g. one that doesn't quite fit on a single line, now 
> > > takes
> > > 4 lines instead of 2.  Which is really annoying and IMO tends to break 
> > > flow.
> > > 
> > > If you don't like
> > > 
> > >   /* gnu
> > >  style */
> > > 
> > > or
> > > 
> > >   /* whatever
> > >* this is */
> > > 
> > > could you live with
> > > 
> > >   // c99/c++
> > >   // comments
> > 
> > FWIW:
> > 
> > +1 for one or two of those compact styles for two- or three-line comments.  
> 
> or four-line or single paragraphs or comments that rhyme with
> "orange"...  If the comment is too large for a single line, then take
> some time to say it more concisely, or maybe it does deserve enough
> thought to frame a nice paragraph for it.  We're well into personal
> style here, so either this is important enough to make some people
> unhappy or we should leave it to maintainer preference and consistency
> within a file/area.  Thanks,

+1 to maintainer judgment and consistency.

(And FWIW, c++ style comments are what I use when I comment something
out for debugging, so I don't like them in regular code :)



Re: [Qemu-devel] [PATCH v2 2/4] vmdk: Implement .bdrv_co_create callback

2018-06-13 Thread Markus Armbruster
Still only looking at QAPI-related aspects.

Fam Zheng  writes:

> This makes VMDK support x-blockdev-create. The implementation reuses the
> image creation code in vmdk_co_create_opts which now acceptes a callback
> pointer to "retrieve" BlockBackend pointers from the caller. This way we
> separate the logic between file/extent acquisition and initialization.
>
> The QAPI command parameters are mostly the same as the old create_opts
> except the dropped legacy @compat6 switch, which is redundant with
> @hwversion.
>
> Signed-off-by: Fam Zheng 
> ---
>  block/vmdk.c  | 461 
> --
>  qapi/block-core.json  |  67 +++-
>  qapi/qapi-schema.json |   1 +
>  3 files changed, 399 insertions(+), 130 deletions(-)
>
> diff --git a/block/vmdk.c b/block/vmdk.c
> index 083942f806..ae121b36e0 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -1905,38 +1905,68 @@ static int filename_decompose(const char *filename, 
> char *path, char *prefix,
>  return VMDK_OK;
>  }
>  
> -static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts 
> *opts,
> -Error **errp)
> +/*
> + * idx == 0: get or create the descriptor file (also the image file if in a
> + *   non-split format.
> + * idx >= 1: get the n-th extent if in a split subformat
> + */
> +typedef BlockBackend *(*vmdk_create_extent_fn)(int64_t size,
> +   int idx,
> +   bool flat,
> +   bool split,
> +   bool compress,
> +   bool zeroed_grain,
> +   void *opaque,
> +   Error **errp);
> +
> +static void vmdk_desc_add_extent(GString *desc,
> + const char *extent_line_fmt,
> + int64_t size, const char *filename)
>  {
> -int idx = 0;
> -BlockBackend *new_blk = NULL;
> +char *desc_line = g_malloc0(BUF_SIZE);
> +const char *basename = strrchr(filename, '/');
> +if (!basename) {
> +basename = filename;
> +} else {
> +basename += 1;
> +}

g_path_get_basename()?

> +snprintf(desc_line, BUF_SIZE, extent_line_fmt,
> + DIV_ROUND_UP(size, BDRV_SECTOR_SIZE),
> + basename);
> +g_string_append(desc, desc_line);
> +g_free(desc_line);

g_string_append_printf()?

> +}
> +
> +static int coroutine_fn vmdk_co_do_create(int64_t size,
> +  BlockdevVmdkSubformat subformat,
> +  BlockdevVmdkAdapterType 
> adapter_type,
> +  const char *backing_file,
> +  const char *hw_version,
> +  bool compat6,
> +  bool zeroed_grain,
> +  vmdk_create_extent_fn extent_fn,
> +  void *opaque,
> +  Error **errp)
> +{
> +int extent_idx;
> +BlockBackend *blk = NULL;
>  Error *local_err = NULL;
>  char *desc = NULL;
> -int64_t total_size = 0, filesize;
> -char *adapter_type = NULL;
> -char *backing_file = NULL;
> -char *hw_version = NULL;
> -char *fmt = NULL;
>  int ret = 0;
>  bool flat, split, compress;
>  GString *ext_desc_lines;
> -char *path = g_malloc0(PATH_MAX);
> -char *prefix = g_malloc0(PATH_MAX);
> -char *postfix = g_malloc0(PATH_MAX);
> -char *desc_line = g_malloc0(BUF_SIZE);
> -char *ext_filename = g_malloc0(PATH_MAX);
> -char *desc_filename = g_malloc0(PATH_MAX);
>  const int64_t split_size = 0x8000;  /* VMDK has constant split size 
> */
> -const char *desc_extent_line;
> +int64_t extent_size;
> +int64_t created_size = 0;
> +const char *extent_line_fmt;
>  char *parent_desc_line = g_malloc0(BUF_SIZE);
>  uint32_t parent_cid = 0x;
>  uint32_t number_heads = 16;
> -bool zeroed_grain = false;
>  uint32_t desc_offset = 0, desc_len;
>  const char desc_template[] =
>  "# Disk DescriptorFile\n"
>  "version=1\n"
> -"CID=%" PRIx32 "\n"
> +"CID=%08" PRIx32 "\n"

Didn't you want to back out this one?

>  "parentCID=%" PRIx32 "\n"
>  "createType=\"%s\"\n"
>  "%s"
> @@ -1955,71 +1985,35 @@ static int coroutine_fn vmdk_co_create_opts(const 
> char *filename, QemuOpts *opts
>  
>  ext_desc_lines = g_string_new(NULL);
>  
> -if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) 
> {
> -ret = -EINVAL;
> -goto exit;
> -}
>  /* Read out options */
> -total_size = R

Re: [Qemu-devel] [PATCH 1/4] spapr: remove irq_hint parameter from spapr_irq_alloc()

2018-06-13 Thread Cédric Le Goater
On 06/13/2018 06:22 AM, David Gibson wrote:
> On Tue, Jun 05, 2018 at 08:41:13AM +0200, Cédric Le Goater wrote:
>> On 06/05/2018 05:34 AM, David Gibson wrote:
>>> On Mon, May 28, 2018 at 09:06:12AM +0200, Cédric Le Goater wrote:
 On 05/28/2018 08:17 AM, Thomas Huth wrote:
> On 25.05.2018 16:02, Greg Kurz wrote:
>> On Fri, 18 May 2018 18:44:02 +0200
>> Cédric Le Goater  wrote:
>>
>>> This IRQ number hint can possibly be used by the VIO devices if the
>>> "irq" property is defined on the command line but it seems it is never
>>> the case. It is not used in libvirt for instance. So, let's remove it
>>> to simplify future changes.
>>>
>>
>> Setting an irq manually looks a bit anachronistic. I doubt anyone would
>> do that nowadays, and the patch does a nice cleanup. So this looks like
>> a good idea.
> [...]
>>> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
>>> index 472dd6f33a96..cc064f64fccf 100644
>>> --- a/hw/ppc/spapr_vio.c
>>> +++ b/hw/ppc/spapr_vio.c
>>> @@ -455,7 +455,7 @@ static void spapr_vio_busdev_realize(DeviceState 
>>> *qdev, Error **errp)
>>>  dev->qdev.id = id;
>>>  }
>>>  
>>> -dev->irq = spapr_irq_alloc(spapr, dev->irq, false, &local_err);
>>> +dev->irq = spapr_irq_alloc(spapr, false, &local_err);
>>
>> Silently breaking "irq" like this looks wrong. I'd rather officially 
>> remove
>> it first (ie, kill spapr_vio_props, -5 lines in spapr_vio.c).
>>
>> Of course, this raises the question of interface deprecation, and it 
>> should
>> theoretically follow the process described at:
>>
>> https://wiki.qemu.org/Features/LegacyRemoval#Rules_for_removing_an_interface
>>
>> Cc'ing Thomas, our Chief Deprecation Officer, for insights :)
>
> The property is a public interface. Just because it's not used by
> libvirt does not mean that nobody is using it. So yes, please follow the
> rules and mark it as deprecated first for two release, before you really
> remove it.

 This "irq" property is a problem to introduce a new static layout of IRQ 
 numbers. It is in complete opposition. 

 Can we keep it as it is for old pseries machine (settable) and ignore it 
 for newer ? Would that be fine ?
>>>
>>> So, Thomas is right that we need to keep the interface while we go
>>> through the deprecation process, even though it's a bit of a pain
>>> (like you, I seriously doubt anyone ever used it).
>>
>> That's OK. The patch is simple. But it means that we have to keep the 
>> irq_hint parameter for 2 QEMU versions.
> 
> No.. the suggestion below is designed to avoid that..
> 
>>> But, I think there's a way to avoid that getting in the way of your
>>> cleanups too much.
>>>
>>> A bunch of the current problems are caused because spapr_irq_alloc()
>>> conflates two meanings of "allocate": 1) finding a free irq to use for
>>> this device and 2) assigning that irq exclusively to this device.
>>>
>>> I think the first thing to do is to split those two parts.  (1) will
>>> never take an irq parameter, (2) will always take an irq parameter.
>>> To implement the (to be deprecated) "irq" property on vio devices you
>>> should skip (1) and just call (2) with the given irq number.
>>
>> well, we need to call both because if "irq" is zero then when we 
>> fallback to "1) finding a free irq to use."
> 
> No, basically in the VIO code itself you'd have:
>   irq = ;
>   if (!irq)
>   irq = find_irq()
>   claim_irq(irq);
>
> find_irq() never takes a hint, claim_irq() always does (except it's
> not really a hint).

ok. I add something like that in mind : 
 
if (dev->irq) {
spapr_irq_assign(spapr, SPAPR_IRQ_VIO, dev->irq, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
} else {
dev->irq = spapr_irq_alloc(spapr, SPAPR_IRQ_VIO, vio_index++,  
   &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}

and spapr_irq_assign() would die when the vio "irq" property does.

>> But we can move the exclusive IRQ assignment (2) under the VIO model 
>> which is the only one using it and start deprecating the property.
> 
> No.. the exclusive claim would be global - everything would use that.

Yes, I see the model. I am not sure it's useful to have two routines
in the long term.

>>> The point of this series is to basically get rid of (1), but this
>>> first step means we don't need to worry about the hint parameter as we
>>> gradually remove it.
>>
>> OK. I think I got what you are asking for. (2) means adding an extra 
>> handler to the sPAPR IRQ interface, which would always fail in the
>> new XICS sPAPR IRQ backend using static numbers.
> 
> No.. (2), "claim_irq()" as I called it above, would _always_ be used.
> find_irq() 

Re: [Qemu-devel] [PULL 00/33] ppc-for-3.0 queue 20180612

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 09:31:40 +1000
David Gibson  wrote:

> On Tue, Jun 12, 2018 at 08:46:03PM +0200, Greg Kurz wrote:
> > On Tue, 12 Jun 2018 16:55:08 +0100
> > Peter Maydell  wrote:
> >   
> > > On 12 June 2018 at 16:52, Greg Kurz  wrote:  
> > > > On Tue, 12 Jun 2018 15:34:22 +0100
> > > > Peter Maydell  wrote:
> > > >
> > > >> On 12 June 2018 at 07:44, David Gibson  
> > > >> wrote:
> > > >> > 
> > > >> > ppc patch queue 2018-06-12
> > > >> >
> > > >> > Here's another batch of ppc patches towards the 3.0 release.  There's
> > > >> > a fair bit here, because I've been working through my mail backlog
> > > >> > after a holiday.  There's not much of a central theme, amongst other
> > > >> > things we have:
> > > >> > * ppc440 / sam460ex improvements
> > > >> > * logging and error cleanups
> > > >> > * 40p (PReP) bugfixes
> > > >> > * Macintosh fixes and cleanups
> > > >> > * Add emulation of the new POWER9 store-forwarding barrier
> > > >> >   instruction variant
> > > >> > * Hotplug cleanups
> > > >> >
> > > >>
> > > >> Applied, thanks.
> > >   
> > > > Oh... so we've broken bisect until Cedric's fix is merged.
> > > 
> > > If you want me not to merge a pullreq you need to follow up to
> > > the cover letter saying "please don't merge this" or something
> > > similar. Otherwise I will not notice.  
> > 
> > Yeah I've only answered to the offending patch, sorry...  
> 
> And it was the middle of the night for me.
> 
> It's only with pretty old gcc, I think we can live with it, I intend
> to send another pullreq with the fixup as soon as I can.
> 

It was meeting day for me yesterday so I didn't have time to check with
other GCC versions, but I could finally do it this morning. They fail
all the same:

hw/intc/xics_kvm.c: In function ‘ics_set_kvm_state’:
hw/intc/xics_kvm.c:281:20: error: ‘ret’ may be used uninitialized in this
 function [-Werror=maybe-uninitialized]
 return ret;

Fedora28:

$ cc --version
cc (GCC) 8.1.1 20180502 (Red Hat 8.1.1-1)

Ubuntu 18.04:

$ cc --version
cc (Ubuntu 7.3.0-16ubuntu3) 7.3.0

Ubuntu 17.10:

$ cc --version
cc (Ubuntu 7.2.0-8ubuntu3.2) 7.2.0

RHEL 7.5:

$ cc --version
cc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-28)

My concern is more that it requires --enable-debug to be passed to
configure. As noted by Cedric in another mail, -O2 seems to prevent
GCC to detect this obvious error...



pgpoS6AQFZOTH.pgp
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH 2/4] sparp_pci: simplify how the PCI LSIs are allocated

2018-06-13 Thread Cédric Le Goater
On 06/13/2018 06:27 AM, David Gibson wrote:
> On Tue, Jun 05, 2018 at 08:31:49AM +0200, Cédric Le Goater wrote:
>> On 06/05/2018 05:44 AM, David Gibson wrote:
>>> On Sat, May 26, 2018 at 11:40:23AM +0200, Greg Kurz wrote:
 On Fri, 18 May 2018 18:44:03 +0200
 Cédric Le Goater  wrote:

> PCI LSIs are today allocated one by one using the IRQ alloc_block
> routine. Change the code sequence to first allocate a PCI_NUM_PINS
> block. It will help us providing a generic IRQ framework to the
> machine.
>
> Signed-off-by: Cédric Le Goater 
> ---
>  hw/ppc/spapr_pci.c | 21 ++---
>  1 file changed, 10 insertions(+), 11 deletions(-)
>
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 39a14980d397..4fd97ffe4c6e 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -1546,6 +1546,8 @@ static void spapr_phb_realize(DeviceState *dev, 
> Error **errp)
>  sPAPRTCETable *tcet;
>  const unsigned windows_supported =
>  sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
> +uint32_t irq;
> +Error *local_err = NULL;
>  
>  if (!spapr) {
>  error_setg(errp, TYPE_SPAPR_PCI_HOST_BRIDGE " needs a pseries 
> machine");
> @@ -1694,18 +1696,15 @@ static void spapr_phb_realize(DeviceState *dev, 
> Error **errp)
>  QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
>  
>  /* Initialize the LSI table */
> -for (i = 0; i < PCI_NUM_PINS; i++) {
> -uint32_t irq;
> -Error *local_err = NULL;
> -
> -irq = spapr_irq_alloc_block(spapr, 1, true, false, &local_err);
> -if (local_err) {
> -error_propagate(errp, local_err);
> -error_prepend(errp, "can't allocate LSIs: ");
> -return;
> -}
> +irq = spapr_irq_alloc_block(spapr, PCI_NUM_PINS, true, false, 
> &local_err);
> +if (local_err) {
> +error_propagate(errp, local_err);
> +error_prepend(errp, "can't allocate LSIs: ");
> +return;
> +}
>  

 It isn't strictly equivalent. The current code would be happy with
 sparse IRQ numbers, while the proposed one wouldn't... Anyway, this
 cannot happen since we don't have PHB hotplug.
>>>
>>> This makes me pretty nervous, because it's not obvious it will come up
>>> with the same numbers in all circumstances, which we have to do for
>>> existing machine types.
>>
>> Given that : 
>>
>>  - irq_hint is "unused"
>>  - all IRQs are allocated sequentially at machine init,  
>>  - spapr_pci is the only model using the block allocation for MSIs, 
>>potentially fragmenting more the IRQ number space but done at 
>>guest runtime. 
>>  - the PHB LSI are the allocated at realize time doing the loop above, 
>>  - we don't support PHB hotplug 
>>  - we do support PHB coldplug but then the IRQ allocation is done
>>at machine time,
>>
>> it seems highly improbable that the IRQ number space is fragmented
>> to a point which would not allow the loop above to return four 
>> contiguous IRQ numbers, always.
> 
> Well, assuming irq_hint really is unused, that's right.  But we can't
> assume that - that's the whole point of the deprecation thing.
> 
> Given that, AIUI, just one vio device with irq= set to a value that
> would be within an LSI block allocated under the old scheme would
> result in the new scheme returning a non-contiguous set of LSIs -
> i.e. a different result from what we used to have.
> 
>> That is why I felt confident changing the loop to a single block 
>> allocation. 
>>
>>> It's also not obvious to me why it's useful
>>> to go via this step before going straight to static allocation of the
>>> irq numbers.
>>
>> It pollutes the new sPAPR IRQ interface API with an extra parameter 
>> to support both underlying backend and it complexifies the code 
>> to handle block allocation of a single IRQ (like above) within an 
>> IRQ range (the PCI LSIs).
>>
>> So you end up having a family, a device index, a count, an alignment,
>> and an index within the range. pffut.
>>
>> Also, could we kill the alignment ?
> 
> Since we sometimes pass 'true', no, we can't, without changing the
> existing pattern of allocations, which we can't do.

To be honest, this is very much discouraging. 

C. 




Re: [Qemu-devel] [PATCH v5] cutils: Provide strchrnul

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 07:42:57 +0200
Markus Armbruster  wrote:

> Keno Fischer  writes:
> 
> > strchrnul is a GNU extension and thus unavailable on a number of targets.
> > In the review for a commit removing strchrnul from 9p, I was asked to
> > create a qemu_strchrnul helper to factor out this functionality.
> > Do so, and use it in a number of other places in the code base that inlined
> > the replacement pattern in a place where strchrnul could be used.
> >
> > Signed-off-by: Keno Fischer 
> > Acked-by: Greg Kurz   
> 
> Reviewed-by: Markus Armbruster 

Maybe I can take this through my 9p-next tree since it is a pre-requisite
for the Darwin support series ?

Keno,

BTW, if you want Darwin support to go in the next QEMU release, please
pay attention that I have to send a pull req for it before 2018-07-03.

https://wiki.qemu.org/Planning/3.0

Cheers,

--
Greg



Re: [Qemu-devel] [PATCH 3/4] spapr: introduce a generic IRQ frontend to the machine

2018-06-13 Thread Cédric Le Goater
On 06/13/2018 07:00 AM, David Gibson wrote:
> On Fri, May 18, 2018 at 06:44:04PM +0200, Cédric Le Goater wrote:
>> This proposal moves all the related IRQ routines of the sPAPR machine
>> behind a class interface to prepare for future changes in the IRQ
>> controller model. First of which is a reorganization of the IRQ number
>> space layout and a second, coming later, will be to integrate the
>> support for the new POWER9 XIVE IRQ controller.
>>
>> The new interface defines a set of fixed IRQ number ranges, for each
>> IRQ type, in which devices allocate the IRQ numbers they need
>> depending on a unique device index. Here is the layout :
>>
>> SPAPR_IRQ_IPI0x0/*  1 IRQ per CPU  */
>> SPAPR_IRQ_EPOW   0x1000 /*  1 IRQ per device   */
>> SPAPR_IRQ_HOTPLUG0x1001 /*  1 IRQ per device   */
>> SPAPR_IRQ_VIO0x1100 /*  1 IRQ per device   */
>> SPAPR_IRQ_PCI_LSI0x1200 /*  4 IRQs per device  */
>> SPAPR_IRQ_PCI_MSI0x1400 /* 1K IRQs per device  */
>>
>> The IPI range is reserved for future use when XIVE support
>> comes in.
>>
>> The routines of this interface encompass the previous needs and the
>> new ones and seem complex but the provided IRQ backend should
>> implement what we have today without any functional changes.
>>
>> Each device model is modified to take the new interface into account
>> using the IRQ range/type definitions and a device index. A part from
>> the VIO devices, lacking an id, the changes are relatively simple.
> 
> I find your use of "back end" vs. "front end" in this patch and the
> next kind of confusing.

This is the the front end, interface used by the machine and devices :

  int spapr_irq_assign(sPAPRMachineState *spapr, uint32_t range, uint32_t irq,
   Error **errp);
  int spapr_irq_alloc(sPAPRMachineState *spapr, uint32_t range, uint32_t index,
Error **errp);
  int spapr_irq_alloc_block(sPAPRMachineState *spapr, uint32_t range,
  uint32_t index, int num, bool align, Error **errp);
  void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num, Error **errp);
  qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq);

and the backend, which can be different depending on the machine level, 
old vs. new layout, or on depending on the interrupt controller.

  typedef struct sPAPRIrq {
uint32_tnr_irqs;
const sPAPRPIrqRange *ranges;

void (*init)(sPAPRMachineState *spapr, Error **errp);
int (*assign)(sPAPRMachineState *spapr, uint32_t range, uint32_t irq,
  Error **errp);
int (*alloc)(sPAPRMachineState *spapr, uint32_t range, uint32_t index,
 Error **errp);
int (*alloc_block)(sPAPRMachineState *spapr, uint32_t range,
   uint32_t index, int num, bool align, Error **errp);
void (*free)(sPAPRMachineState *spapr, int irq, int num, Error **errp);
qemu_irq (*qirq)(sPAPRMachineState *spapr, int irq);
void (*print_info)(sPAPRMachineState *spapr, Monitor *mon);
} sPAPRIrq;


>> Signed-off-by: Cédric Le Goater 
>> ---
>>  include/hw/ppc/spapr.h |  10 +-
>>  include/hw/ppc/spapr_irq.h |  46 +
>>  hw/ppc/spapr.c | 177 +-
>>  hw/ppc/spapr_events.c  |   7 +-
>>  hw/ppc/spapr_irq.c | 233 
>> +
>>  hw/ppc/spapr_pci.c |  21 +++-
>>  hw/ppc/spapr_vio.c |   5 +-
>>  hw/ppc/Makefile.objs   |   2 +-
>>  8 files changed, 308 insertions(+), 193 deletions(-)
>>  create mode 100644 include/hw/ppc/spapr_irq.h
>>  create mode 100644 hw/ppc/spapr_irq.c
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 2cfdfdd67eaf..4eb212b16a51 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -3,10 +3,10 @@
>>  
>>  #include "sysemu/dma.h"
>>  #include "hw/boards.h"
>> -#include "hw/ppc/xics.h"
>>  #include "hw/ppc/spapr_drc.h"
>>  #include "hw/mem/pc-dimm.h"
>>  #include "hw/ppc/spapr_ovec.h"
>> +#include "hw/ppc/spapr_irq.h"
>>  
>>  struct VIOsPAPRBus;
>>  struct sPAPRPHBState;
>> @@ -104,6 +104,7 @@ struct sPAPRMachineClass {
>>unsigned n_dma, uint32_t *liobns, Error **errp);
>>  sPAPRResizeHPT resize_hpt_default;
>>  sPAPRCapabilities default_caps;
>> +sPAPRIrq *irq;
>>  };
>>  
>>  /**
>> @@ -773,13 +774,6 @@ int spapr_get_vcpu_id(PowerPCCPU *cpu);
>>  void spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp);
>>  PowerPCCPU *spapr_find_cpu(int vcpu_id);
>>  
>> -int spapr_irq_alloc(sPAPRMachineState *spapr, bool lsi, Error **errp);
>> -int spapr_irq_alloc_block(sPAPRMachineState *spapr, int num, bool lsi,
>> -  bool align, Error **errp);
>> -void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num);
>> -qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq);
>> -
>> -
>>  int spapr_caps_pre_load(void *opaque);
>>  

[Qemu-devel] [PATCH] nvme: Reset s->nr_queues upon open failure

2018-06-13 Thread Fam Zheng
It is wrong to leave this field as 1, as nvme_close() called in the
error handling code in nvme_file_open() will use it and try to free
s->queues again.

Clear the fields to avoid double-free.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Fam Zheng 
---
 block/nvme.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/nvme.c b/block/nvme.c
index 6f71122bf5..7bdeb0ffce 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -666,6 +666,8 @@ fail_queue:
 nvme_free_queue_pair(bs, s->queues[0]);
 fail:
 g_free(s->queues);
+s->queues = NULL;
+s->nr_queues = 0;
 if (s->regs) {
 qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE);
 }
-- 
2.17.0




[Qemu-devel] [PATCH] nvme: Support image creation

2018-06-13 Thread Fam Zheng
Similar to the host_device's implementation, we check the requested
length against the namespace size.

Truncation is necessary to make qcow2 creation work.

Signed-off-by: Fam Zheng 
---
 block/nvme.c | 72 
 1 file changed, 72 insertions(+)

diff --git a/block/nvme.c b/block/nvme.c
index 6f71122bf5..ec3d18e790 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -21,6 +21,7 @@
 #include "qemu/option.h"
 #include "qemu/vfio-helpers.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "trace.h"
 
 #include "block/nvme.h"
@@ -1154,6 +1155,73 @@ static void nvme_unregister_buf(BlockDriverState *bs, 
void *host)
 qemu_vfio_dma_unmap(s->vfio, host);
 }
 
+static QemuOptsList nvme_create_opts = {
+.name = "nvme-create-opts",
+.head = QTAILQ_HEAD_INITIALIZER(nvme_create_opts.head),
+.desc = {
+{
+.name = BLOCK_OPT_SIZE,
+.type = QEMU_OPT_SIZE,
+.help = "Virtual disk size"
+},
+{ /* end of list */ }
+}
+};
+
+static int coroutine_fn nvme_co_create_opts(const char *filename, QemuOpts 
*opts,
+Error **errp)
+{
+int ret = 0;
+BlockDriverState *bs = NULL;
+int64_t size;
+
+if (strncmp(filename, "nvme://", strlen("nvme://"))) {
+error_setg(errp, "Invalid filename (must start with \"nvme://\")");
+ret = -EINVAL;
+goto out;
+}
+
+bs = bdrv_open(filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+if (!bs) {
+ret = -EINVAL;
+goto out;
+}
+
+size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+
+if (size < 0 || bdrv_getlength(bs) < size) {
+error_setg(errp, "Invalid image size");
+ret = -EINVAL;
+}
+
+out:
+bdrv_unref(bs);
+/* Hold breath for a little while before letting image format creation run.
+ * The problem is when testing with Intel P3700, the controller doesn't
+ * like the immediate open after close, as a result, nvme_init() will fail.
+ * This works around that.
+ **/
+g_usleep(200);
+return ret;
+}
+
+static int nvme_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
+{
+if (prealloc != PREALLOC_MODE_OFF) {
+error_setg(errp, "Preallocation mode '%s' unsupported",
+   PreallocMode_str(prealloc));
+return -ENOTSUP;
+}
+
+if (offset > nvme_getlength(bs)) {
+error_setg(errp, "Cannot grow device files");
+return -EINVAL;
+}
+
+return 0;
+}
+
 static BlockDriver bdrv_nvme = {
 .format_name  = "nvme",
 .protocol_name= "nvme",
@@ -1180,6 +1248,10 @@ static BlockDriver bdrv_nvme = {
 
 .bdrv_register_buf= nvme_register_buf,
 .bdrv_unregister_buf  = nvme_unregister_buf,
+
+.create_opts  = &nvme_create_opts,
+.bdrv_co_create_opts  = nvme_co_create_opts,
+.bdrv_truncate= nvme_truncate,
 };
 
 static void bdrv_nvme_init(void)
-- 
2.17.0




Re: [Qemu-devel] [PATCH v4 1/2] qemu-error: introduce {error|warn}_report_once

2018-06-13 Thread Markus Armbruster
Cornelia Huck  writes:

> On Wed, 30 May 2018 11:30:45 +0800
> Peter Xu  wrote:
>
>> On Tue, May 29, 2018 at 11:30:00AM +0200, Cornelia Huck wrote:
>> > On Thu, 24 May 2018 12:44:53 +0800
>> > Peter Xu  wrote:
>> >   
>> > > There are many error_report()s that can be used in frequently called
>> > > functions, especially on IO paths.  That can be unideal in that
>> > > malicious guest can try to trigger the error tons of time which might
>> > > use up the log space on the host (e.g., libvirt can capture the stderr
>> > > of QEMU and put it persistently onto disk).  In VT-d emulation code, we
>> > > have trace_vtd_error() tracer.  AFAIU all those places can be replaced
>> > > by something like error_report() but trace points are mostly used to
>> > > avoid the DDOS attack that mentioned above.  However using trace points
>> > > mean that errors are not dumped if trace not enabled.
>> > > 
>> > > It's not a big deal in most modern server managements since we have
>> > > things like logrotate to maintain the logs and make sure the quota is
>> > > expected.  However it'll still be nice that we just provide another way
>> > > to restrict message generations.  In most cases, this kind of
>> > > error_report()s will only provide valid information on the first message
>> > > sent, and all the rest of similar messages will be mostly talking about
>> > > the same thing.  This patch introduces *_report_once() helpers to allow
>> > > a message to be dumped only once during one QEMU process's life cycle.
>> > > It will make sure: (1) it's on by deffault, so we can even get something
>> > > without turning the trace on and reproducing, and (2) it won't be
>> > > affected by DDOS attack.  
>> > 
>> > This is good for something (sub-)system wide, where it is enough to
>> > alert the user once; but we may want to print something e.g. once per
>> > the device where it happens (see v3 of "vfio-ccw: add force unlimited
>> > prefetch property" for an example).  
>> 
>> I'm glad that we start to have more users of it, no matter which
>> implementation we'll choose.  At least it means it makes some sense to
>> have such a thing.
>> 
>> For me this patch works nicely enough.  Of course there can be
>> per-device errors, but AFAICT mostly we don't have any error at
>> all... and my debugging experience is that when multiple error happens
>> on different devices simutaneously, they'll very possible that it's
>> caused by the same problem (again, errors are rare after all), or, the
>> rest of the problems are caused by the first error only (so the first
>> error might cause a collapse of the rest).  That's why I wanted to
>> always debug with the first error I encounter, because that's mostly
>> always the root cause. In that sense, current patch works nicely for
>> me (note that we can have device ID embeded in the error message with
>> this one).
>
> I think we have slightly different use cases here. In your case,
> something (an error) happened and you don't really care about any
> subsequent messages. In the vfio-ccw case, we want to log when a guest
> tries to do things on a certain device, so we can possibly throw a
> magic switch for that device. We still want messages after that so we
> can catch further devices for which we may want to throw the magic
> switch. (Similar for the other message, we want to give a hint why a
> certain device may not work as expected.)
>
>> 
>> At the same time, I think this patch should be easier to use of course
>> - no extra variables to define, and very self contained. So I would
>> slightly prefer current patch.
>> 
>> However I'm also fine with the approach proposed in the vfio-ccw patch
>> too.  Though if so I would possibly drop the 2nd patch too since if
>> with the vfio-ccw patch I'll need to introduce one bool for every
>> trace_vtd_err() place... then I'd not bother with it any more but
>> instead live with that trace_*(). ;) Or I can define one per-IOMMU
>> error boolean and pass it in for each of the error_report_once(), but
>> it seems a bit awkward too.
>
> I think we can have both the fine-grained control and convenience
> macros for those cases where we really just want to print a message
> once.

Yes.  Cornelia, feel free to post a followup patch that satisfies your
needs.

>> 
>> >   
>> > > 
>> > > To implement it, I stole the printk_once() macro from Linux.  
>> > 
>> > Would something akin to printk_ratelimited() also make sense to avoid
>> > log flooding?  
>> 
>> Yes it will.  IMHO we can have that too as follow up if we want, and
>> it does not conflict with this print_once().  I'd say currently this
>> error_report_once() is good enough for me, especially lightweighted.
>> I suspect we'll need more lines to implement a ratelimited version.
>
> Yes, and I agree that wants to be a separate patch should we find a use
> case for it.

Yes.



Re: [Qemu-devel] [PATCH v4 1/2] qemu-error: introduce {error|warn}_report_once

2018-06-13 Thread Markus Armbruster
Cornelia Huck  writes:

> On Wed, 30 May 2018 14:39:55 +0800
> Peter Xu  wrote:
>
>> On Wed, May 30, 2018 at 07:47:32AM +0300, Michael S. Tsirkin wrote:
>> > On Thu, May 24, 2018 at 12:44:53PM +0800, Peter Xu wrote:  
>> > > There are many error_report()s that can be used in frequently called
>> > > functions, especially on IO paths.  That can be unideal in that
>> > > malicious guest can try to trigger the error tons of time which might
>> > > use up the log space on the host (e.g., libvirt can capture the stderr
>> > > of QEMU and put it persistently onto disk).  
>> > 
>> > I think the problem is real enough but I think the API
>> > isn't great as it stresses the mechanism. Which fundamentally does
>> > not matter - we can print once or 10 times, or whatever.
>> > 
>> > What happens here is a guest bug as opposed to hypervisor
>> > bug. So I think a better name would be guest_error.  
>> 
>> For me error_report_once() is okay since after all it's only a way to
>> dump something for the hypervisor management software (or the person
>> who manages the QEMU instance), and I don't have a strong opinion to
>> introduce a new guest_error() API.
>
> If we go with that suggestion, guest_{error,warn} should also prefix
> the message with "Guest:" or so. Otherwise, it does not offer that much
> more benefit.
>
> [And I think it should be a wrapper around the report_once
> infrastructure.]

I agree.  Keep error_report_once() as low-level function (okay to stress
mechanism there), then wrap whatever higher level functions we find
useful around them, in followup patches.

[...]



Re: [Qemu-devel] [PATCH v4 1/2] qemu-error: introduce {error|warn}_report_once

2018-06-13 Thread Markus Armbruster
Peter Xu  writes:

> There are many error_report()s that can be used in frequently called
> functions, especially on IO paths.  That can be unideal in that
> malicious guest can try to trigger the error tons of time which might
> use up the log space on the host (e.g., libvirt can capture the stderr
> of QEMU and put it persistently onto disk).  In VT-d emulation code, we
> have trace_vtd_error() tracer.  AFAIU all those places can be replaced
> by something like error_report() but trace points are mostly used to
> avoid the DDOS attack that mentioned above.  However using trace points
> mean that errors are not dumped if trace not enabled.
>
> It's not a big deal in most modern server managements since we have
> things like logrotate to maintain the logs and make sure the quota is
> expected.  However it'll still be nice that we just provide another way
> to restrict message generations.  In most cases, this kind of
> error_report()s will only provide valid information on the first message
> sent, and all the rest of similar messages will be mostly talking about
> the same thing.  This patch introduces *_report_once() helpers to allow
> a message to be dumped only once during one QEMU process's life cycle.
> It will make sure: (1) it's on by deffault, so we can even get something

default

> without turning the trace on and reproducing, and (2) it won't be
> affected by DDOS attack.
>
> To implement it, I stole the printk_once() macro from Linux.
>
> CC: Eric Blake 
> CC: Markus Armbruster 
> Signed-off-by: Peter Xu 
> ---
>  include/qemu/error-report.h | 32 
>  1 file changed, 32 insertions(+)
>
> diff --git a/include/qemu/error-report.h b/include/qemu/error-report.h
> index e1c8ae1a52..c7ec54cb97 100644
> --- a/include/qemu/error-report.h
> +++ b/include/qemu/error-report.h
> @@ -44,6 +44,38 @@ void error_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
>  void warn_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
>  void info_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
>  
> +/*
> + * Similar to error_report(), but it only prints the message once.  It
> + * returns true when it prints the first time, otherwise false.

I like to start function contracts with a single line stating the
function's purpose, and I prefer imperative mood, like this:

* Similar to error_report(), but it only prints the message once.
* Return true when it prints, false otherwise.

> + */
> +#define error_report_once(fmt, ...) \
> +({  \
> +static bool print_once_;   \
> +bool ret_print_once_ = !print_once_;  \
> +\
> +if (!print_once_) {\
> +print_once_ = true;\
> +error_report(fmt, ##__VA_ARGS__);   \
> +}   \
> +unlikely(ret_print_once_); \
> +})

Please align the backslashes, say with emacs command c-backslash-region,
bound to C-c C-\.

> +
> +/*
> + * Similar to warn_report(), but it only prints the message once.  It
> + * returns true when it prints the first time, otherwise false.
> + */
> +#define warn_report_once(fmt, ...)  \
> +({  \
> +static bool print_once_;   \
> +bool ret_print_once_ = !print_once_;  \
> +\
> +if (!print_once_) {\
> +print_once_ = true;\
> +warn_report(fmt, ##__VA_ARGS__);   \
> +}   \
> +unlikely(ret_print_once_); \
> +})

Likewise.

> +
>  const char *error_get_progname(void);
>  extern bool enable_timestamp_msg;

With these nits addressed:
Reviewed-by: Markus Armbruster 

I can touch them up when I apply.



Re: [Qemu-devel] [PATCH v4 2/2] intel-iommu: start to use error_report_once

2018-06-13 Thread Markus Armbruster
Peter Xu  writes:

> Replace existing trace_vtd_err() with error_report_once() then stderr
> will capture something if any of the error happens, meanwhile we don't
> suffer from any DDOS.  Then remove the trace point.  Since at it,
> provide more information where proper (now we can pass parameters into
> the report function).
>
> Reviewed-by: Philippe Mathieu-Daudé 
> Signed-off-by: Peter Xu 
> ---
>  hw/i386/intel_iommu.c | 59 ---
>  hw/i386/trace-events  |  1 -
>  2 files changed, 33 insertions(+), 27 deletions(-)

Michael, would you give your Reviewed-by or Acked-by?  I'd take the
series through my tree then.

[...]



Re: [Qemu-devel] [PATCH] nvme: Support image creation

2018-06-13 Thread Kevin Wolf
Am 13.06.2018 um 09:46 hat Fam Zheng geschrieben:
> Similar to the host_device's implementation, we check the requested
> length against the namespace size.
> 
> Truncation is necessary to make qcow2 creation work.
> 
> Signed-off-by: Fam Zheng 

> +static int coroutine_fn nvme_co_create_opts(const char *filename, QemuOpts 
> *opts,
> +Error **errp)
> +{
> +int ret = 0;
> +BlockDriverState *bs = NULL;
> +int64_t size;
> +
> +if (strncmp(filename, "nvme://", strlen("nvme://"))) {
> +error_setg(errp, "Invalid filename (must start with \"nvme://\")");
> +ret = -EINVAL;
> +goto out;
> +}
> +
> +bs = bdrv_open(filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, 
> errp);
> +if (!bs) {
> +ret = -EINVAL;
> +goto out;
> +}
> +
> +size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
> +
> +if (size < 0 || bdrv_getlength(bs) < size) {
> +error_setg(errp, "Invalid image size");
> +ret = -EINVAL;
> +}
> +
> +out:
> +bdrv_unref(bs);
> +/* Hold breath for a little while before letting image format creation 
> run.
> + * The problem is when testing with Intel P3700, the controller doesn't
> + * like the immediate open after close, as a result, nvme_init() will 
> fail.
> + * This works around that.
> + **/
> +g_usleep(200);

This suggests that nbd_init() is buggy.

If we need to sleep here (for two whole seconds?!), I'm sure there are
other cases that would have to sleep as well. So even if we can't find a
solution other than sleeping - which feels horribly wrong - the sleep
should probably be in nvme_init() rather than here.

What kind of error are you running into without the sleep?

Kevin



Re: [Qemu-devel] [PATCH 1/7] spapr: Clean up cpu realize/unrealize paths

2018-06-13 Thread Cédric Le Goater
On 06/13/2018 08:57 AM, David Gibson wrote:
> spapr_cpu_init() and spapr_cpu_destroy() are only called from the spapr
> cpu core realize/unrealize paths, and really can only be called from there.
> 
> Those are all short functions, so fold the pairs together for simplicity.
> While we're there rename some functions and change some parameter types
> for brevity and clarity.
> 
> Signed-off-by: David Gibson 

Reviewed-by: Cédric Le Goater 

Still a call to spapr_cpu_reset(cpu). We should try to get rid of it
one day.
 
Thanks,

C.

> ---
>  hw/ppc/spapr_cpu_core.c | 69 +++--
>  1 file changed, 25 insertions(+), 44 deletions(-)
> 
> diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> index f3e9b879b2..7fdb3b6667 100644
> --- a/hw/ppc/spapr_cpu_core.c
> +++ b/hw/ppc/spapr_cpu_core.c
> @@ -83,26 +83,6 @@ void spapr_cpu_set_entry_state(PowerPCCPU *cpu, 
> target_ulong nip, target_ulong r
>  ppc_store_lpcr(cpu, env->spr[SPR_LPCR] | pcc->lpcr_pm);
>  }
>  
> -static void spapr_cpu_destroy(PowerPCCPU *cpu)
> -{
> -qemu_unregister_reset(spapr_cpu_reset, cpu);
> -}
> -
> -static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
> -   Error **errp)
> -{
> -CPUPPCState *env = &cpu->env;
> -
> -/* Set time-base frequency to 512 MHz */
> -cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
> -
> -cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
> -kvmppc_set_papr(cpu);
> -
> -qemu_register_reset(spapr_cpu_reset, cpu);
> -spapr_cpu_reset(cpu);
> -}
> -
>  /*
>   * Return the sPAPR CPU core type for @model which essentially is the CPU
>   * model specified with -cpu cmdline option.
> @@ -122,44 +102,47 @@ const char *spapr_get_cpu_core_type(const char 
> *cpu_type)
>  return object_class_get_name(oc);
>  }
>  
> -static void spapr_cpu_core_unrealizefn(DeviceState *dev, Error **errp)
> +static void spapr_unrealize_vcpu(PowerPCCPU *cpu)
> +{
> +qemu_unregister_reset(spapr_cpu_reset, cpu);
> +object_unparent(cpu->intc);
> +cpu_remove_sync(CPU(cpu));
> +object_unparent(OBJECT(cpu));
> +}
> +
> +static void spapr_cpu_core_unrealize(DeviceState *dev, Error **errp)
>  {
>  sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
>  CPUCore *cc = CPU_CORE(dev);
>  int i;
>  
>  for (i = 0; i < cc->nr_threads; i++) {
> -Object *obj = OBJECT(sc->threads[i]);
> -DeviceState *dev = DEVICE(obj);
> -CPUState *cs = CPU(dev);
> -PowerPCCPU *cpu = POWERPC_CPU(cs);
> -
> -spapr_cpu_destroy(cpu);
> -object_unparent(cpu->intc);
> -cpu_remove_sync(cs);
> -object_unparent(obj);
> +spapr_unrealize_vcpu(sc->threads[i]);
>  }
>  g_free(sc->threads);
>  }
>  
> -static void spapr_cpu_core_realize_child(Object *child,
> - sPAPRMachineState *spapr, Error 
> **errp)
> +static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +   Error **errp)
>  {
> +CPUPPCState *env = &cpu->env;
>  Error *local_err = NULL;
> -CPUState *cs = CPU(child);
> -PowerPCCPU *cpu = POWERPC_CPU(cs);
>  
> -object_property_set_bool(child, true, "realized", &local_err);
> +object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
>  if (local_err) {
>  goto error;
>  }
>  
> -spapr_cpu_init(spapr, cpu, &local_err);
> -if (local_err) {
> -goto error;
> -}
> +/* Set time-base frequency to 512 MHz */
> +cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
> +
> +cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
> +kvmppc_set_papr(cpu);
>  
> -cpu->intc = icp_create(child, spapr->icp_type, XICS_FABRIC(spapr),
> +qemu_register_reset(spapr_cpu_reset, cpu);
> +spapr_cpu_reset(cpu);
> +
> +cpu->intc = icp_create(OBJECT(cpu), spapr->icp_type, XICS_FABRIC(spapr),
> &local_err);
>  if (local_err) {
>  goto error;
> @@ -220,9 +203,7 @@ static void spapr_cpu_core_realize(DeviceState *dev, 
> Error **errp)
>  }
>  
>  for (j = 0; j < cc->nr_threads; j++) {
> -obj = OBJECT(sc->threads[j]);
> -
> -spapr_cpu_core_realize_child(obj, spapr, &local_err);
> +spapr_realize_vcpu(sc->threads[j], spapr, &local_err);
>  if (local_err) {
>  goto err;
>  }
> @@ -249,7 +230,7 @@ static void spapr_cpu_core_class_init(ObjectClass *oc, 
> void *data)
>  sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_CLASS(oc);
>  
>  dc->realize = spapr_cpu_core_realize;
> -dc->unrealize = spapr_cpu_core_unrealizefn;
> +dc->unrealize = spapr_cpu_core_unrealize;
>  dc->props = spapr_cpu_core_properties;
>  scc->cpu_type = data;
>  }
> 




Re: [Qemu-devel] [PATCH 2/7] pnv: Add missing error check during cpu realize()

2018-06-13 Thread Cédric Le Goater
On 06/13/2018 08:57 AM, David Gibson wrote:
> In pnv_core_realize() we call two functions with an Error * parameter in
> succession, which means if they both cause errors we'll lose the first one.
> Add an extra test/escape to fix this.

I tend now to pass just NULL or &error_abort to object_property_add_child() 
and object_property_add_const_link(). These calls should just not fail.

Reviewed-by: Cédric Le Goater 

Thanks,

C. 
> 
> Signed-off-by: David Gibson 
> ---
>  hw/ppc/pnv_core.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> index 13ad7d9e04..efb68226bb 100644
> --- a/hw/ppc/pnv_core.c
> +++ b/hw/ppc/pnv_core.c
> @@ -173,6 +173,9 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  
>  snprintf(name, sizeof(name), "thread[%d]", i);
>  object_property_add_child(OBJECT(pc), name, obj, &local_err);
> +if (local_err) {
> +goto err;
> +}
>  object_property_add_alias(obj, "core-pir", OBJECT(pc),
>"pir", &local_err);
>  if (local_err) {
> 




Re: [Qemu-devel] [PATCH] nvme: Reset s->nr_queues upon open failure

2018-06-13 Thread Kevin Wolf
Am 13.06.2018 um 09:45 hat Fam Zheng geschrieben:
> It is wrong to leave this field as 1, as nvme_close() called in the
> error handling code in nvme_file_open() will use it and try to free
> s->queues again.
> 
> Clear the fields to avoid double-free.
> 
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Fam Zheng 
> ---
>  block/nvme.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/block/nvme.c b/block/nvme.c
> index 6f71122bf5..7bdeb0ffce 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c
> @@ -666,6 +666,8 @@ fail_queue:
>  nvme_free_queue_pair(bs, s->queues[0]);
>  fail:
>  g_free(s->queues);
> +s->queues = NULL;
> +s->nr_queues = 0;
>  if (s->regs) {
>  qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, 
> NVME_BAR_SIZE);
>  }

Hm... Basically all the cleanup is duplicated. It's not only
nvme_free_queue_pair(), but also qemu_vfio_pci_unmap_bar() and
qemu_vfio_close(). Are we sure it's intended to call them twice?

Maybe nvme_init() shouldn't clean up any of this and rely on the
later nvme_close() call to do that?

I also notice that the error handling code in nvme_init() has a
g_free(s->queues) and event_notifier_cleanup(&s->irq_notifier), which
nvme_close() doesn't. Are these leaks in nvme_close()?

Kevin



Re: [Qemu-devel] [PATCH 3/7] pnv_core: Allocate cpu thread objects individually

2018-06-13 Thread Cédric Le Goater
On 06/13/2018 08:57 AM, David Gibson wrote:
> Currently, we allocate space for all the cpu objects within a single core
> in one big block.  This was copied from an older version of the spapr code
> and requires some ugly pointer manipulation to extract the individual
> objects.
> 
> This design was due to a misunderstanding of qemu lifetime conventions and
> has already been changed in spapr (in 94ad93bd "spapr_cpu_core: instantiate
> CPUs separately".
> 
> Make an equivalent change in pnv_core to get rid of the nasty pointer
> arithmetic.
>
> Signed-off-by: David Gibson 

Ah nice cleanup :)

Reviewed-by: Cédric Le Goater 

Thanks,

C.

> ---
>  hw/ppc/pnv.c  |  4 ++--
>  hw/ppc/pnv_core.c | 11 +--
>  include/hw/ppc/pnv_core.h |  2 +-
>  3 files changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 0314881316..0b9508d94d 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -121,9 +121,9 @@ static int get_cpus_node(void *fdt)
>   */
>  static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt)
>  {
> -CPUState *cs = CPU(DEVICE(pc->threads));
> +PowerPCCPU *cpu = pc->threads[0];
> +CPUState *cs = CPU(cpu);
>  DeviceClass *dc = DEVICE_GET_CLASS(cs);
> -PowerPCCPU *cpu = POWERPC_CPU(cs);
>  int smt_threads = CPU_CORE(pc)->nr_threads;
>  CPUPPCState *env = &cpu->env;
>  PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> index efb68226bb..59309e149c 100644
> --- a/hw/ppc/pnv_core.c
> +++ b/hw/ppc/pnv_core.c
> @@ -151,7 +151,6 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  PnvCore *pc = PNV_CORE(OBJECT(dev));
>  CPUCore *cc = CPU_CORE(OBJECT(dev));
>  const char *typename = pnv_core_cpu_typename(pc);
> -size_t size = object_type_get_instance_size(typename);
>  Error *local_err = NULL;
>  void *obj;
>  int i, j;
> @@ -165,11 +164,11 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  return;
>  }
>  
> -pc->threads = g_malloc0(size * cc->nr_threads);
> +pc->threads = g_new(PowerPCCPU *, cc->nr_threads);
>  for (i = 0; i < cc->nr_threads; i++) {
> -obj = pc->threads + i * size;
> +obj = object_new(typename);
>  
> -object_initialize(obj, size, typename);
> +pc->threads[i] = POWERPC_CPU(obj);
>  
>  snprintf(name, sizeof(name), "thread[%d]", i);
>  object_property_add_child(OBJECT(pc), name, obj, &local_err);
> @@ -185,7 +184,7 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  }
>  
>  for (j = 0; j < cc->nr_threads; j++) {
> -obj = pc->threads + j * size;
> +obj = OBJECT(pc->threads[j]);
>  
>  pnv_core_realize_child(obj, XICS_FABRIC(xi), &local_err);
>  if (local_err) {
> @@ -200,7 +199,7 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  
>  err:
>  while (--i >= 0) {
> -obj = pc->threads + i * size;
> +obj = OBJECT(pc->threads[i]);
>  object_unparent(obj);
>  }
>  g_free(pc->threads);
> diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
> index e337af7a3a..447ae761f7 100644
> --- a/include/hw/ppc/pnv_core.h
> +++ b/include/hw/ppc/pnv_core.h
> @@ -34,7 +34,7 @@ typedef struct PnvCore {
>  CPUCore parent_obj;
>  
>  /*< public >*/
> -void *threads;
> +PowerPCCPU **threads;
>  uint32_t pir;
>  
>  MemoryRegion xscom_regs;
> 




[Qemu-devel] [PATCH] s390x/cpumodels: add z14 Model ZR1

2018-06-13 Thread Christian Borntraeger
introduce the new z14 Model ZR1 cpu model. Mostly identical to z14, only
the cpu type differs (3906 vs. 3907)

Signed-off-by: Christian Borntraeger 
---
 target/s390x/cpu_models.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index e10035aaa8..cfdbccf46d 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -79,6 +79,7 @@ static S390CPUDef s390_cpu_defs[] = {
 CPUDEF_INIT(0x2964, 13, 2, 47, 0x0800U, "z13.2", "IBM z13 GA2"),
 CPUDEF_INIT(0x2965, 13, 2, 47, 0x0800U, "z13s", "IBM z13s GA1"),
 CPUDEF_INIT(0x3906, 14, 1, 47, 0x0800U, "z14", "IBM z14 GA1"),
+CPUDEF_INIT(0x3907, 14, 1, 47, 0x0800U, "z14ZR1", "IBM z14 Model ZR1 
GA1"),
 };
 
 #define QEMU_MAX_CPU_TYPE 0x2827
-- 
2.17.0




Re: [Qemu-devel] [PATCH 4/7] pnv: Clean up cpu realize path

2018-06-13 Thread Cédric Le Goater
On 06/13/2018 08:57 AM, David Gibson wrote:
> pnv_cpu_init() is only called from the the pnv cpu core realize path, and
> really only can be called from there.  So fold it into its caller, which
> we also rename for brevity.
> 
> Signed-off-by: David Gibson 

I think we should set the default CPU settings (PIR) before creating
the 'intc' object. I have cleanup for that in the pnv patchset. 
Nevertheless, 

Reviewed-by: Cédric Le Goater 

Thanks,

C.

> ---
>  hw/ppc/pnv_core.c | 56 ++-
>  1 file changed, 21 insertions(+), 35 deletions(-)
> 
> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> index 59309e149c..c9648fd1ad 100644
> --- a/hw/ppc/pnv_core.c
> +++ b/hw/ppc/pnv_core.c
> @@ -54,28 +54,6 @@ static void pnv_cpu_reset(void *opaque)
>  env->msr |= MSR_HVB; /* Hypervisor mode */
>  }
>  
> -static void pnv_cpu_init(PowerPCCPU *cpu, Error **errp)
> -{
> -CPUPPCState *env = &cpu->env;
> -int core_pir;
> -int thread_index = 0; /* TODO: TCG supports only one thread */
> -ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
> -
> -core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", 
> &error_abort);
> -
> -/*
> - * The PIR of a thread is the core PIR + the thread index. We will
> - * need to find a way to get the thread index when TCG supports
> - * more than 1. We could use the object name ?
> - */
> -pir->default_value = core_pir + thread_index;
> -
> -/* Set time-base frequency to 512 MHz */
> -cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
> -
> -qemu_register_reset(pnv_cpu_reset, cpu);
> -}
> -
>  /*
>   * These values are read by the PowerNV HW monitors under Linux
>   */
> @@ -121,29 +99,39 @@ static const MemoryRegionOps pnv_core_xscom_ops = {
>  .endianness = DEVICE_BIG_ENDIAN,
>  };
>  
> -static void pnv_core_realize_child(Object *child, XICSFabric *xi, Error 
> **errp)
> +static void pnv_realize_vcpu(PowerPCCPU *cpu, XICSFabric *xi, Error **errp)
>  {
> +CPUPPCState *env = &cpu->env;
> +int core_pir;
> +int thread_index = 0; /* TODO: TCG supports only one thread */
> +ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
>  Error *local_err = NULL;
> -CPUState *cs = CPU(child);
> -PowerPCCPU *cpu = POWERPC_CPU(cs);
>  
> -object_property_set_bool(child, true, "realized", &local_err);
> +object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
>  if (local_err) {
>  error_propagate(errp, local_err);
>  return;
>  }
>  
> -cpu->intc = icp_create(child, TYPE_PNV_ICP, xi, &local_err);
> +cpu->intc = icp_create(OBJECT(cpu), TYPE_PNV_ICP, xi, &local_err);
>  if (local_err) {
>  error_propagate(errp, local_err);
>  return;
>  }
>  
> -pnv_cpu_init(cpu, &local_err);
> -if (local_err) {
> -error_propagate(errp, local_err);
> -return;
> -}
> +core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", 
> &error_abort);
> +
> +/*
> + * The PIR of a thread is the core PIR + the thread index. We will
> + * need to find a way to get the thread index when TCG supports
> + * more than 1. We could use the object name ?
> + */
> +pir->default_value = core_pir + thread_index;
> +
> +/* Set time-base frequency to 512 MHz */
> +cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
> +
> +qemu_register_reset(pnv_cpu_reset, cpu);
>  }
>  
>  static void pnv_core_realize(DeviceState *dev, Error **errp)
> @@ -184,9 +172,7 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  }
>  
>  for (j = 0; j < cc->nr_threads; j++) {
> -obj = OBJECT(pc->threads[j]);
> -
> -pnv_core_realize_child(obj, XICS_FABRIC(xi), &local_err);
> +pnv_realize_vcpu(pc->threads[j], XICS_FABRIC(xi), &local_err);
>  if (local_err) {
>  goto err;
>  }
> 




[Qemu-devel] [PATCH v2 1/2] memfd: fix possible usage of the uninitialized file descriptor

2018-06-13 Thread Dima Stepanov
The qemu_memfd_alloc_check() routine allocates the fd variable on stack.
This variable is initialized inside the qemu_memfd_alloc() function.
There are several cases when *fd will be left unintialized which can
lead to the unexpected close() in the qemu_memfd_free() call.

Set file descriptor to -1 before calling the qemu_memfd_alloc routine.

Signed-off-by: Dima Stepanov 
---
 util/memfd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/util/memfd.c b/util/memfd.c
index d248a53..6287946 100644
--- a/util/memfd.c
+++ b/util/memfd.c
@@ -187,6 +187,7 @@ bool qemu_memfd_alloc_check(void)
 int fd;
 void *ptr;
 
+fd = -1;
 ptr = qemu_memfd_alloc("test", 4096, 0, &fd, NULL);
 memfd_check = ptr ? MEMFD_OK : MEMFD_KO;
 qemu_memfd_free(ptr, 4096, fd);
-- 
2.7.4




Re: [Qemu-devel] [PATCH 5/7] pnv: Add cpu unrealize path

2018-06-13 Thread Cédric Le Goater
On 06/13/2018 08:57 AM, David Gibson wrote:
> Currently we don't have any unrealize path for pnv cpu cores.  We get away
> with this because we don't yet support cpu hotplug for pnv.
> 
> However, we're going to want it eventually, and in the meantime, it makes
> it non-obvious why there are a bunch of allocations on the realize() path
> that don't have matching frees.
> 
> So, implement the missing unrealize path.
> 
> Signed-off-by: David Gibson 

Reviewed-by: Cédric Le Goater 

Thanks,

C.


> ---
>  hw/ppc/pnv_core.c | 21 +
>  1 file changed, 21 insertions(+)
> 
> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> index c9648fd1ad..c70dbbe056 100644
> --- a/hw/ppc/pnv_core.c
> +++ b/hw/ppc/pnv_core.c
> @@ -192,6 +192,26 @@ err:
>  error_propagate(errp, local_err);
>  }
>  
> +static void pnv_unrealize_vcpu(PowerPCCPU *cpu)
> +{
> +qemu_unregister_reset(pnv_cpu_reset, cpu);
> +object_unparent(cpu->intc);
> +cpu_remove_sync(CPU(cpu));
> +object_unparent(OBJECT(cpu));
> +}
> +
> +static void pnv_core_unrealize(DeviceState *dev, Error **errp)
> +{
> +PnvCore *pc = PNV_CORE(dev);
> +CPUCore *cc = CPU_CORE(dev);
> +int i;
> +
> +for (i = 0; i < cc->nr_threads; i++) {
> +pnv_unrealize_vcpu(pc->threads[i]);
> +}
> +g_free(pc->threads);
> +}
> +
>  static Property pnv_core_properties[] = {
>  DEFINE_PROP_UINT32("pir", PnvCore, pir, 0),
>  DEFINE_PROP_END_OF_LIST(),
> @@ -202,6 +222,7 @@ static void pnv_core_class_init(ObjectClass *oc, void 
> *data)
>  DeviceClass *dc = DEVICE_CLASS(oc);
>  
>  dc->realize = pnv_core_realize;
> +dc->unrealize = pnv_core_unrealize;
>  dc->props = pnv_core_properties;
>  }
>  
> 




Re: [Qemu-devel] [PATCH 3/3] target/ppc: filter out non-zero PCR values when using TCG

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 10:45:06 +1000
David Gibson  wrote:

> On Tue, Jun 12, 2018 at 07:04:15PM +0200, Greg Kurz wrote:
> > Bits set in the PCR disable features of the processor. TCG currently
> > doesn't implement that, ie, we always act like if PCR is all zeros.
> > 
> > But it is still possible for the PCR to have a non-null value. This may
> > confuse the guest.
> > 
> > There are three distinct cases:
> > 
> > 1) a powernv guest doing mtspr SPR_PCR
> > 
> > 2) reset of a pseries guest if the max-cpu-compat machine property is set
> > 
> > 3) CAS of a pseries guest
> > 
> > This patch adds a ppc_store_pcr() helper that ensures we cannot put
> > a non-null value in the PCR when using TCG. This helper also has
> > error propagation support, so that each case listed above can be
> > handled appropriately:
> > 
> > 1) since the powernv machine is mostly used for OpenPOWER FW devel,
> >we just print an error and let QEMU continue execution
> > 
> > 2) an error is printed and QEMU exits, ie, same behaviour as when
> >KVM doesn't support the requested compat mode
> > 
> > 3) an error is printed and QEMU reports H_HARDWARE to the guest
> > 
> > Signed-off-by: Greg Kurz   
> 
> I'm not really convinced this is a good idea.  Printing a (non fatal)
> error if the guest attempts to write a non-zero value to the PCR
> should be ok.  However, you're generating a fatal error if the machine
> tries to set the PCR in TCG mode.  That could easily happen using,
> e.g. the cap-htm flag on a TCG guest.  That would take TCG from mostly
> working, to refusing to run at all.
> 

I'm confused... I don't see anything related to HTM in TCG. Also we have
the following in cap_htm_apply():

if (tcg_enabled()) {
error_setg(errp,
   "No Transactional Memory support in TCG, try cap-htm=off");

I'm probably missing something... can you enlighten me ?

> > ---
> >  target/ppc/compat.c  |   26 --
> >  target/ppc/cpu.h |3 +++
> >  target/ppc/misc_helper.c |9 ++---
> >  3 files changed, 33 insertions(+), 5 deletions(-)
> > 
> > diff --git a/target/ppc/compat.c b/target/ppc/compat.c
> > index 807c906f6848..08aa99e6ad47 100644
> > --- a/target/ppc/compat.c
> > +++ b/target/ppc/compat.c
> > @@ -138,8 +138,8 @@ void ppc_set_compat(PowerPCCPU *cpu, uint32_t 
> > compat_pvr, Error **errp)
> >  {
> >  const CompatInfo *compat = compat_by_pvr(compat_pvr);
> >  CPUPPCState *env = &cpu->env;
> > -PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> >  uint64_t pcr;
> > +Error *local_err = NULL;
> >  
> >  if (!compat_pvr) {
> >  pcr = 0;
> > @@ -165,8 +165,30 @@ void ppc_set_compat(PowerPCCPU *cpu, uint32_t 
> > compat_pvr, Error **errp)
> >  }
> >  }
> >  
> > +ppc_store_pcr(env, pcr, &local_err);
> > +if (local_err) {
> > +error_propagate(errp, local_err);
> > +return;
> > +}
> > +
> >  cpu->compat_pvr = compat_pvr;
> > -env->spr[SPR_PCR] = pcr & pcc->pcr_mask;
> > +}
> > +
> > +void ppc_store_pcr(CPUPPCState *env, target_ulong value, Error **errp)
> > +{
> > +PowerPCCPU *cpu = ppc_env_get_cpu(env);
> > +PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > +
> > +/* TODO: this check should go away once we actually put the proper PCR
> > + * checks in the various bits of TCG that should have them.
> > + */
> > +if (!kvm_enabled() && value != 0) {
> > +error_setg(errp, "TCG doesn't support PCR value 0x"TARGET_FMT_lx,
> > +   value);
> > +return;
> > +}
> > +
> > +env->spr[SPR_PCR] = value & pcc->pcr_mask;
> >  }
> >  
> >  typedef struct {
> > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> > index a91f1a8777eb..fdaae34feffb 100644
> > --- a/target/ppc/cpu.h
> > +++ b/target/ppc/cpu.h
> > @@ -1296,6 +1296,9 @@ int ppc_cpu_handle_mmu_fault(CPUState *cpu, vaddr 
> > address, int size, int rw,
> >  #if !defined(CONFIG_USER_ONLY)
> >  void ppc_store_sdr1 (CPUPPCState *env, target_ulong value);
> >  void ppc_store_ptcr(CPUPPCState *env, target_ulong value);
> > +#if defined(TARGET_PPC64)
> > +void ppc_store_pcr(CPUPPCState *env, target_ulong value, Error **errp);
> > +#endif
> >  #endif /* !defined(CONFIG_USER_ONLY) */
> >  void ppc_store_msr (CPUPPCState *env, target_ulong value);
> >  
> > diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
> > index b88493009609..7a9b45a01453 100644
> > --- a/target/ppc/misc_helper.c
> > +++ b/target/ppc/misc_helper.c
> > @@ -21,6 +21,7 @@
> >  #include "exec/exec-all.h"
> >  #include "exec/helper-proto.h"
> >  #include "qemu/error-report.h"
> > +#include "qapi/error.h"
> >  
> >  #include "helper_regs.h"
> >  
> > @@ -102,10 +103,12 @@ void helper_store_ptcr(CPUPPCState *env, target_ulong 
> > val)
> >  
> >  void helper_store_pcr(CPUPPCState *env, target_ulong value)
> >  {
> > -PowerPCCPU *cpu = ppc_env_get_cpu(env);
> > -PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(c

[Qemu-devel] [PATCH v2 2/2] memory: fix possible NULL pointer dereference

2018-06-13 Thread Dima Stepanov
In the memory_region_do_invalidate_mmio_ptr() routine the section
variable is intialized by the memory_region_find() call. The section.mr
field can be set to NULL.

Add the check for NULL before trying to drop a section.

Signed-off-by: Dima Stepanov 
---
 memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/memory.c b/memory.c
index 3212acc..bb45248 100644
--- a/memory.c
+++ b/memory.c
@@ -2712,7 +2712,7 @@ static void memory_region_do_invalidate_mmio_ptr(CPUState 
*cpu,
 /* Reset dirty so this doesn't happen later. */
 cpu_physical_memory_test_and_clear_dirty(offset, size, 1);
 
-if (section.mr != mr) {
+if (section.mr && (section.mr != mr)) {
 /* memory_region_find add a ref on section.mr */
 memory_region_unref(section.mr);
 if (MMIO_INTERFACE(section.mr->owner)) {
-- 
2.7.4




Re: [Qemu-devel] [PATCH v4 1/1] s390x/ipl: Try to detect Linux vs non Linux for initial IPL PSW

2018-06-13 Thread Cornelia Huck
On Tue, 12 Jun 2018 14:59:33 +0200
Christian Borntraeger  wrote:

> Right now the IPL device always starts from address 0x1 (the usual
> Linux entry point). To run other guests (e.g. test programs) it is
> useful to use the IPL PSW from address 0. We can use the Linux magic
> at 0x10008 to decide.
> 
> Signed-off-by: Christian Borntraeger 
> ---
> v3->v4:
>   - iplpsw-> ipl_psw
>   - move check for load failures into the non-elf case
>   - change comment about ipl psw
> v2->v3:
>   - check for iplpsw to avoid assert on file errors
>   - use 4 bytes at 4 instead of 8 bytes at 0
> v1->v2:
>   - use LINUX_MAGIC_ADDR define
>   - use assert for valid iplpsw pointer
>   - add endianess conversion
>  hw/s390x/ipl.c | 27 ++-
>  1 file changed, 22 insertions(+), 5 deletions(-)

Thanks, applied.



[Qemu-devel] [PATCH v2 0/2] misc fixes found by static analyzer

2018-06-13 Thread Dima Stepanov
During the development process we used scan-build as static analyzer to
check the changes. There are some issues found. The patch set below is
to resolve issues found.

Changes v2:
 - remove one patch, since it was resolved by: 7eb24009

Dima Stepanov (2):
  memfd: fix possible usage of the uninitialized file descriptor
  memory: fix possible NULL pointer dereference

 memory.c | 2 +-
 util/memfd.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

-- 
2.7.4




Re: [Qemu-devel] [RFC v2 1/3] pci_expander_bridge: add type TYPE_PXB_PCIE_HOST

2018-06-13 Thread Zihan Yang
Michael S. Tsirkin  于2018年6月12日周二 下午9:43写道:
>
> On Tue, Jun 12, 2018 at 05:13:22PM +0800, Zihan Yang wrote:
> > The inner host bridge created by pxb-pcie is TYPE_PXB_PCI_HOST by default,
> > add a new type TYPE_PXB_PCIE_HOST to better utilize the ECAM of PCIe
> >
> > Signed-off-by: Zihan Yang 
>
> I have a concern that there are lots of new properties
> added here, I'm not sure how are upper layers supposed to
> manage them all.
>
> E.g. bus_nr supplied in several places, domain_nr for which
> it's not clear how it is supposed to be allocated, etc.

Indeed they seem to double the properties, but the pxb host is
an internal structure of pxb-pcie device, created in pxb-pcie's
realization procedure, and acpi-build queries host bridges instead
of pxb-pcie devices. This means that users can not directly specify
the property of pxb host bridge, but must 'inherit' from pxb-pcie
devices. I had thought about changing the acpi-build process,
but that would require more modifications.

As for the properties, bus_nr means the start bus number
of this host bridge. It is used when pxb-pcie is in pci domain 0
with q35 host to avoid bus number confliction. When it is placed
in a separate pci domain, it is not used and should be 0.

max_bus means how many buses the user desires, EACH bus in
PCIe requires 1MB configuration space, thus specifying it could
reduce the reserved memory in MMCFG as suggested by Marcel.
Typically, the user can specify

-device pxb-pcie,id=br1,bus="pcie.0",sep_domain=on,domain_nr=1,max_bus=130

this will place the buses under this pxb host bridge in pci domain
1, and reserve (130 + 1) = 131 buses for it. The start bus number
is always 0 currently for simplicity.

> Can the management interface be simplified?
> Ideally we wouldn't have to teach libvirt new tricks,
> just generalize pxb support slightly.

We can delete 'sep_domain' property, I just find 'domain_nr'
already indicates domain number. But domain_nr and
max_bus seems unremovable, although they look 'redundant'
because they appear twice.

I'm not familiar with libvirt, but from the perspective of user,
only 2 properties are added(domain_nr and max_bus, if we
delete sep_domain), though the internal structure actually has
changed.



[Qemu-devel] [PATCH 3/3] mos6522: expose mos6522_update_irq() through MOS6522DeviceClass

2018-06-13 Thread Mark Cave-Ayland
In the case where we have an interrupt generated externally from inputs to
bits 1 and 2 of port A and/or port B, it is necessary to expose
mos6522_update_irq() so it can be called by the interrupt source.

Signed-off-by: Mark Cave-Ayland 
---
 hw/misc/mos6522.c | 1 +
 include/hw/misc/mos6522.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/hw/misc/mos6522.c b/hw/misc/mos6522.c
index 8d5b419825..14cff26c61 100644
--- a/hw/misc/mos6522.c
+++ b/hw/misc/mos6522.c
@@ -463,6 +463,7 @@ static void mos6522_class_init(ObjectClass *oc, void *data)
 mdc->set_sr_int = mos6522_set_sr_int;
 mdc->portB_write = mos6522_portB_write;
 mdc->portA_write = mos6522_portA_write;
+mdc->update_irq = mos6522_update_irq;
 mdc->get_timer1_counter_value = mos6522_get_counter_value;
 mdc->get_timer2_counter_value = mos6522_get_counter_value;
 mdc->get_timer1_load_time = mos6522_get_load_time;
diff --git a/include/hw/misc/mos6522.h b/include/hw/misc/mos6522.h
index f52b41920b..03d9f0c059 100644
--- a/include/hw/misc/mos6522.h
+++ b/include/hw/misc/mos6522.h
@@ -134,6 +134,7 @@ typedef struct MOS6522DeviceClass {
 void (*set_sr_int)(MOS6522State *dev);
 void (*portB_write)(MOS6522State *dev);
 void (*portA_write)(MOS6522State *dev);
+void (*update_irq)(MOS6522State *dev);
 /* These are used to influence the CUDA MacOS timebase calibration */
 uint64_t (*get_timer1_counter_value)(MOS6522State *dev, MOS6522Timer *ti);
 uint64_t (*get_timer2_counter_value)(MOS6522State *dev, MOS6522Timer *ti);
-- 
2.11.0




[Qemu-devel] [PATCH 0/3] mos6522: allow IRQs from external port pins

2018-06-13 Thread Mark Cave-Ayland
Whilst testing a conversion of Laurent's q800 patchset over to use mos6522
I discovered some issues which prevented IRQs being generated from inputs to
external port pins.

This is a requirement for the q800 patchset which uses external clocks to
generate periodic interrupts.

Signed-off-by: Mark Cave-Ayland 


Mark Cave-Ayland (3):
  mos6522: only clear the shift register interrupt upon write
  mos6522: remove additional interrupt flag filter from
mos6522_update_irq()
  mos6522: expose mos6522_update_irq() through MOS6522DeviceClass

 hw/misc/mos6522.c | 5 +++--
 include/hw/misc/mos6522.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

-- 
2.11.0




[Qemu-devel] [PATCH 2/3] mos6522: remove additional interrupt flag filter from mos6522_update_irq()

2018-06-13 Thread Mark Cave-Ayland
The datasheet indicates that the interrupt is generated by ANDing the
interrupt flags register (IFR) with the interrupt enable register (IER)
but currently there is an extra filter for the SR and timer interrupts.

Remove this extra filter to allow interrupts to be generated by external
inputs on bits 1 and 2 of ports A and B.

Signed-off-by: Mark Cave-Ayland 
---
 hw/misc/mos6522.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/misc/mos6522.c b/hw/misc/mos6522.c
index ad5041d8c0..8d5b419825 100644
--- a/hw/misc/mos6522.c
+++ b/hw/misc/mos6522.c
@@ -40,7 +40,7 @@ static void mos6522_timer_update(MOS6522State *s, 
MOS6522Timer *ti,
 
 static void mos6522_update_irq(MOS6522State *s)
 {
-if (s->ifr & s->ier & (SR_INT | T1_INT | T2_INT)) {
+if (s->ifr & s->ier) {
 qemu_irq_raise(s->irq);
 } else {
 qemu_irq_lower(s->irq);
-- 
2.11.0




[Qemu-devel] [PATCH 1/3] mos6522: only clear the shift register interrupt upon write

2018-06-13 Thread Mark Cave-Ayland
According to the 6522 datasheet the shift register (SR) interrupt flag is
cleared upon write with no mention of any other interrupt flags.

Signed-off-by: Mark Cave-Ayland 
---
 hw/misc/mos6522.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/misc/mos6522.c b/hw/misc/mos6522.c
index 44eb306cf1..ad5041d8c0 100644
--- a/hw/misc/mos6522.c
+++ b/hw/misc/mos6522.c
@@ -241,7 +241,7 @@ uint64_t mos6522_read(void *opaque, hwaddr addr, unsigned 
size)
 break;
 case VIA_REG_SR:
 val = s->sr;
-s->ifr &= ~(SR_INT | CB1_INT | CB2_INT);
+s->ifr &= ~SR_INT;
 mos6522_update_irq(s);
 break;
 case VIA_REG_ACR:
-- 
2.11.0




Re: [Qemu-devel] [PATCH 1/7] spapr: Clean up cpu realize/unrealize paths

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 16:57:01 +1000
David Gibson  wrote:

> spapr_cpu_init() and spapr_cpu_destroy() are only called from the spapr
> cpu core realize/unrealize paths, and really can only be called from there.
> 
> Those are all short functions, so fold the pairs together for simplicity.
> While we're there rename some functions and change some parameter types
> for brevity and clarity.
> 
> Signed-off-by: David Gibson 
> ---

Reviewed-by: Greg Kurz 

>  hw/ppc/spapr_cpu_core.c | 69 +++--
>  1 file changed, 25 insertions(+), 44 deletions(-)
> 
> diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> index f3e9b879b2..7fdb3b6667 100644
> --- a/hw/ppc/spapr_cpu_core.c
> +++ b/hw/ppc/spapr_cpu_core.c
> @@ -83,26 +83,6 @@ void spapr_cpu_set_entry_state(PowerPCCPU *cpu, 
> target_ulong nip, target_ulong r
>  ppc_store_lpcr(cpu, env->spr[SPR_LPCR] | pcc->lpcr_pm);
>  }
>  
> -static void spapr_cpu_destroy(PowerPCCPU *cpu)
> -{
> -qemu_unregister_reset(spapr_cpu_reset, cpu);
> -}
> -
> -static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
> -   Error **errp)
> -{
> -CPUPPCState *env = &cpu->env;
> -
> -/* Set time-base frequency to 512 MHz */
> -cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
> -
> -cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
> -kvmppc_set_papr(cpu);
> -
> -qemu_register_reset(spapr_cpu_reset, cpu);
> -spapr_cpu_reset(cpu);
> -}
> -
>  /*
>   * Return the sPAPR CPU core type for @model which essentially is the CPU
>   * model specified with -cpu cmdline option.
> @@ -122,44 +102,47 @@ const char *spapr_get_cpu_core_type(const char 
> *cpu_type)
>  return object_class_get_name(oc);
>  }
>  
> -static void spapr_cpu_core_unrealizefn(DeviceState *dev, Error **errp)
> +static void spapr_unrealize_vcpu(PowerPCCPU *cpu)
> +{
> +qemu_unregister_reset(spapr_cpu_reset, cpu);
> +object_unparent(cpu->intc);
> +cpu_remove_sync(CPU(cpu));
> +object_unparent(OBJECT(cpu));
> +}
> +
> +static void spapr_cpu_core_unrealize(DeviceState *dev, Error **errp)
>  {
>  sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
>  CPUCore *cc = CPU_CORE(dev);
>  int i;
>  
>  for (i = 0; i < cc->nr_threads; i++) {
> -Object *obj = OBJECT(sc->threads[i]);
> -DeviceState *dev = DEVICE(obj);
> -CPUState *cs = CPU(dev);
> -PowerPCCPU *cpu = POWERPC_CPU(cs);
> -
> -spapr_cpu_destroy(cpu);
> -object_unparent(cpu->intc);
> -cpu_remove_sync(cs);
> -object_unparent(obj);
> +spapr_unrealize_vcpu(sc->threads[i]);
>  }
>  g_free(sc->threads);
>  }
>  
> -static void spapr_cpu_core_realize_child(Object *child,
> - sPAPRMachineState *spapr, Error 
> **errp)
> +static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +   Error **errp)
>  {
> +CPUPPCState *env = &cpu->env;
>  Error *local_err = NULL;
> -CPUState *cs = CPU(child);
> -PowerPCCPU *cpu = POWERPC_CPU(cs);
>  
> -object_property_set_bool(child, true, "realized", &local_err);
> +object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
>  if (local_err) {
>  goto error;
>  }
>  
> -spapr_cpu_init(spapr, cpu, &local_err);
> -if (local_err) {
> -goto error;
> -}
> +/* Set time-base frequency to 512 MHz */
> +cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
> +
> +cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
> +kvmppc_set_papr(cpu);
>  
> -cpu->intc = icp_create(child, spapr->icp_type, XICS_FABRIC(spapr),
> +qemu_register_reset(spapr_cpu_reset, cpu);
> +spapr_cpu_reset(cpu);
> +
> +cpu->intc = icp_create(OBJECT(cpu), spapr->icp_type, XICS_FABRIC(spapr),
> &local_err);
>  if (local_err) {
>  goto error;
> @@ -220,9 +203,7 @@ static void spapr_cpu_core_realize(DeviceState *dev, 
> Error **errp)
>  }
>  
>  for (j = 0; j < cc->nr_threads; j++) {
> -obj = OBJECT(sc->threads[j]);
> -
> -spapr_cpu_core_realize_child(obj, spapr, &local_err);
> +spapr_realize_vcpu(sc->threads[j], spapr, &local_err);
>  if (local_err) {
>  goto err;
>  }
> @@ -249,7 +230,7 @@ static void spapr_cpu_core_class_init(ObjectClass *oc, 
> void *data)
>  sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_CLASS(oc);
>  
>  dc->realize = spapr_cpu_core_realize;
> -dc->unrealize = spapr_cpu_core_unrealizefn;
> +dc->unrealize = spapr_cpu_core_unrealize;
>  dc->props = spapr_cpu_core_properties;
>  scc->cpu_type = data;
>  }




[Qemu-devel] [PATCH v4 1/3] hw/display: add ramfb, a simple boot framebuffer living in guest ram

2018-06-13 Thread Gerd Hoffmann
The boot framebuffer is expected to be configured by the firmware, so it
uses fw_cfg as interface.  Initialization goes as follows:

  (1) Check whenever etc/ramfb is present.
  (2) Allocate framebuffer from RAM.
  (3) Fill struct RAMFBCfg, write it to etc/ramfb.

Done.  You can write stuff to the framebuffer now, and it should appear
automagically on the screen.

Note that this isn't very efficient because it does a full display
update on each refresh.  No dirty tracking.  Dirty tracking would have
to be active for the whole ram slot, so that wouldn't be very efficient
either.  For a boot display which is active for a short time only this
isn't a big deal.  As permanent guest display something better should be
used (if possible).

This is the ramfb core code.  Some windup is needed for display devices
which want have a ramfb boot display.

Signed-off-by: Gerd Hoffmann 
---
 include/hw/display/ramfb.h |  9 +
 hw/display/ramfb.c | 95 ++
 hw/display/Makefile.objs   |  2 +
 3 files changed, 106 insertions(+)
 create mode 100644 include/hw/display/ramfb.h
 create mode 100644 hw/display/ramfb.c

diff --git a/include/hw/display/ramfb.h b/include/hw/display/ramfb.h
new file mode 100644
index 00..a3d4c79942
--- /dev/null
+++ b/include/hw/display/ramfb.h
@@ -0,0 +1,9 @@
+#ifndef RAMFB_H
+#define RAMFB_H
+
+/* ramfb.c */
+typedef struct RAMFBState RAMFBState;
+void ramfb_display_update(QemuConsole *con, RAMFBState *s);
+RAMFBState *ramfb_setup(Error **errp);
+
+#endif /* RAMFB_H */
diff --git a/hw/display/ramfb.c b/hw/display/ramfb.c
new file mode 100644
index 00..6867bce8ae
--- /dev/null
+++ b/hw/display/ramfb.c
@@ -0,0 +1,95 @@
+/*
+ * early boot framebuffer in guest ram
+ * configured using fw_cfg
+ *
+ * Copyright Red Hat, Inc. 2017
+ *
+ * Author:
+ * Gerd Hoffmann 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/loader.h"
+#include "hw/display/ramfb.h"
+#include "ui/console.h"
+#include "sysemu/sysemu.h"
+
+struct QEMU_PACKED RAMFBCfg {
+uint64_t addr;
+uint32_t fourcc;
+uint32_t flags;
+uint32_t width;
+uint32_t height;
+uint32_t stride;
+};
+
+struct RAMFBState {
+DisplaySurface *ds;
+uint32_t width, height;
+struct RAMFBCfg cfg;
+};
+
+static void ramfb_fw_cfg_write(void *dev, off_t offset, size_t len)
+{
+RAMFBState *s = dev;
+void *framebuffer;
+uint32_t stride, fourcc, format;
+hwaddr addr, length;
+
+s->width  = be32_to_cpu(s->cfg.width);
+s->height = be32_to_cpu(s->cfg.height);
+stride= be32_to_cpu(s->cfg.stride);
+fourcc= be32_to_cpu(s->cfg.fourcc);
+addr  = be64_to_cpu(s->cfg.addr);
+length= stride * s->height;
+format= qemu_drm_format_to_pixman(fourcc);
+
+fprintf(stderr, "%s: %dx%d @ 0x%" PRIx64 "\n", __func__,
+s->width, s->height, addr);
+framebuffer = address_space_map(&address_space_memory,
+addr, &length, false,
+MEMTXATTRS_UNSPECIFIED);
+if (!framebuffer || length < stride * s->height) {
+s->width = 0;
+s->height = 0;
+return;
+}
+s->ds = qemu_create_displaysurface_from(s->width, s->height,
+format, stride, framebuffer);
+}
+
+void ramfb_display_update(QemuConsole *con, RAMFBState *s)
+{
+if (!s->width || !s->height) {
+return;
+}
+
+if (s->ds) {
+dpy_gfx_replace_surface(con, s->ds);
+s->ds = NULL;
+}
+
+/* simple full screen update */
+dpy_gfx_update_full(con);
+}
+
+RAMFBState *ramfb_setup(Error **errp)
+{
+FWCfgState *fw_cfg = fw_cfg_find();
+RAMFBState *s;
+
+if (!fw_cfg || !fw_cfg->dma_enabled) {
+error_setg(errp, "ramfb device requires fw_cfg with DMA");
+return NULL;
+}
+
+s = g_new0(RAMFBState, 1);
+
+fw_cfg_add_file_callback(fw_cfg, "etc/ramfb",
+ NULL, ramfb_fw_cfg_write, s,
+ &s->cfg, sizeof(s->cfg), false);
+return s;
+}
diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs
index b5d97ab26d..0af04985d2 100644
--- a/hw/display/Makefile.objs
+++ b/hw/display/Makefile.objs
@@ -1,3 +1,5 @@
+common-obj-y += ramfb.o
+
 common-obj-$(CONFIG_ADS7846) += ads7846.o
 common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o
 common-obj-$(CONFIG_G364FB) += g364fb.o
-- 
2.9.3




[Qemu-devel] [PATCH v4 2/3] hw/display: add standalone ramfb device

2018-06-13 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
---
 include/hw/display/ramfb.h|  3 +++
 hw/arm/sysbus-fdt.c   |  7 +
 hw/arm/virt.c |  2 ++
 hw/display/ramfb-standalone.c | 62 +++
 hw/i386/pc_piix.c |  2 ++
 hw/i386/pc_q35.c  |  2 ++
 hw/display/Makefile.objs  |  1 +
 7 files changed, 79 insertions(+)
 create mode 100644 hw/display/ramfb-standalone.c

diff --git a/include/hw/display/ramfb.h b/include/hw/display/ramfb.h
index a3d4c79942..b33a2c467b 100644
--- a/include/hw/display/ramfb.h
+++ b/include/hw/display/ramfb.h
@@ -6,4 +6,7 @@ typedef struct RAMFBState RAMFBState;
 void ramfb_display_update(QemuConsole *con, RAMFBState *s);
 RAMFBState *ramfb_setup(Error **errp);
 
+/* ramfb-standalone.c */
+#define TYPE_RAMFB_DEVICE "ramfb"
+
 #endif /* RAMFB_H */
diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
index e4c492ea44..277ed872e7 100644
--- a/hw/arm/sysbus-fdt.c
+++ b/hw/arm/sysbus-fdt.c
@@ -36,6 +36,7 @@
 #include "hw/vfio/vfio-platform.h"
 #include "hw/vfio/vfio-calxeda-xgmac.h"
 #include "hw/vfio/vfio-amd-xgbe.h"
+#include "hw/display/ramfb.h"
 #include "hw/arm/fdt.h"
 
 /*
@@ -406,12 +407,18 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, 
void *opaque)
 
 #endif /* CONFIG_LINUX */
 
+static int no_fdt_node(SysBusDevice *sbdev, void *opaque)
+{
+return 0;
+}
+
 /* list of supported dynamic sysbus devices */
 static const NodeCreationPair add_fdt_node_functions[] = {
 #ifdef CONFIG_LINUX
 {TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node},
 {TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node},
 #endif
+{TYPE_RAMFB_DEVICE, no_fdt_node},
 {"", NULL}, /* last element */
 };
 
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f0a4fa004c..98b99cf236 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -36,6 +36,7 @@
 #include "hw/arm/virt.h"
 #include "hw/vfio/vfio-calxeda-xgmac.h"
 #include "hw/vfio/vfio-amd-xgbe.h"
+#include "hw/display/ramfb.h"
 #include "hw/devices.h"
 #include "net/net.h"
 #include "sysemu/device_tree.h"
@@ -1659,6 +1660,7 @@ static void virt_machine_class_init(ObjectClass *oc, void 
*data)
 mc->max_cpus = 255;
 machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_CALXEDA_XGMAC);
 machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
+machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
 mc->block_default_type = IF_VIRTIO;
 mc->no_cdrom = 1;
 mc->pci_allow_0_address = true;
diff --git a/hw/display/ramfb-standalone.c b/hw/display/ramfb-standalone.c
new file mode 100644
index 00..c0d241ba01
--- /dev/null
+++ b/hw/display/ramfb-standalone.c
@@ -0,0 +1,62 @@
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/loader.h"
+#include "hw/isa/isa.h"
+#include "hw/display/ramfb.h"
+#include "ui/console.h"
+#include "sysemu/sysemu.h"
+
+#define RAMFB(obj) OBJECT_CHECK(RAMFBStandaloneState, (obj), TYPE_RAMFB_DEVICE)
+
+typedef struct RAMFBStandaloneState {
+SysBusDevice parent_obj;
+QemuConsole *con;
+RAMFBState *state;
+} RAMFBStandaloneState;
+
+static void display_update_wrapper(void *dev)
+{
+RAMFBStandaloneState *ramfb = RAMFB(dev);
+
+if (0 /* native driver active */) {
+/* non-standalone device would run native display update here */;
+} else {
+ramfb_display_update(ramfb->con, ramfb->state);
+}
+}
+
+static const GraphicHwOps wrapper_ops = {
+.gfx_update = display_update_wrapper,
+};
+
+static void ramfb_realizefn(DeviceState *dev, Error **errp)
+{
+RAMFBStandaloneState *ramfb = RAMFB(dev);
+
+ramfb->con = graphic_console_init(dev, 0, &wrapper_ops, dev);
+ramfb->state = ramfb_setup(errp);
+}
+
+static void ramfb_class_initfn(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
+dc->realize = ramfb_realizefn;
+dc->desc = "ram framebuffer standalone device";
+dc->user_creatable = true;
+}
+
+static const TypeInfo ramfb_info = {
+.name  = TYPE_RAMFB_DEVICE,
+.parent= TYPE_SYS_BUS_DEVICE,
+.instance_size = sizeof(RAMFBStandaloneState),
+.class_init= ramfb_class_initfn,
+};
+
+static void ramfb_register_types(void)
+{
+type_register_static(&ramfb_info);
+}
+
+type_init(ramfb_register_types)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 3b87f3cedb..e9b6f064fb 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -28,6 +28,7 @@
 #include "hw/loader.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/apic.h"
+#include "hw/display/ramfb.h"
 #include "hw/smbios/smbios.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_ids.h"
@@ -423,6 +424,7 @@ static void pc_i440fx_machine_options(MachineClass *m)
 m->desc = "Standard PC (i440FX + PIIX, 1996)";
 m->default_machine_opts = "firmware=bios-256k.bin";
 m->default_display = "std";
+machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
 }
 
 static vo

[Qemu-devel] [PATCH v4 3/3] hw/vfio/display: add ramfb support

2018-06-13 Thread Gerd Hoffmann
So we have a boot display when using a vgpu as primary display.

Use vfio-pci-ramfb instead of vfio-pci to enable it.

Signed-off-by: Gerd Hoffmann 
---
 include/hw/vfio/vfio-common.h |  2 ++
 hw/vfio/display.c | 10 ++
 hw/vfio/pci.c | 15 +++
 3 files changed, 27 insertions(+)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index a9036929b2..a58d7e7e77 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -26,6 +26,7 @@
 #include "qemu/queue.h"
 #include "qemu/notify.h"
 #include "ui/console.h"
+#include "hw/display/ramfb.h"
 #ifdef CONFIG_LINUX
 #include 
 #endif
@@ -143,6 +144,7 @@ typedef struct VFIODMABuf {
 
 typedef struct VFIODisplay {
 QemuConsole *con;
+RAMFBState *ramfb;
 struct {
 VFIORegion buffer;
 DisplaySurface *surface;
diff --git a/hw/vfio/display.c b/hw/vfio/display.c
index 59c0e5d1d7..409d5a2e3a 100644
--- a/hw/vfio/display.c
+++ b/hw/vfio/display.c
@@ -124,6 +124,9 @@ static void vfio_display_dmabuf_update(void *opaque)
 
 primary = vfio_display_get_dmabuf(vdev, DRM_PLANE_TYPE_PRIMARY);
 if (primary == NULL) {
+if (dpy->ramfb) {
+ramfb_display_update(dpy->con, dpy->ramfb);
+}
 return;
 }
 
@@ -181,6 +184,8 @@ static int vfio_display_dmabuf_init(VFIOPCIDevice *vdev, 
Error **errp)
 vdev->dpy->con = graphic_console_init(DEVICE(vdev), 0,
   &vfio_display_dmabuf_ops,
   vdev);
+if (strcmp(object_get_typename(OBJECT(vdev)), "vfio-pci-ramfb") == 0)
+vdev->dpy->ramfb = ramfb_setup(errp);
 return 0;
 }
 
@@ -228,6 +233,9 @@ static void vfio_display_region_update(void *opaque)
 return;
 }
 if (!plane.drm_format || !plane.size) {
+if (dpy->ramfb) {
+ramfb_display_update(dpy->con, dpy->ramfb);
+}
 return;
 }
 format = qemu_drm_format_to_pixman(plane.drm_format);
@@ -300,6 +308,8 @@ static int vfio_display_region_init(VFIOPCIDevice *vdev, 
Error **errp)
 vdev->dpy->con = graphic_console_init(DEVICE(vdev), 0,
   &vfio_display_region_ops,
   vdev);
+if (strcmp(object_get_typename(OBJECT(vdev)), "vfio-pci-ramfb") == 0)
+vdev->dpy->ramfb = ramfb_setup(errp);
 return 0;
 }
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 18c493b49e..6a2b42a595 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3234,9 +3234,24 @@ static const TypeInfo vfio_pci_dev_info = {
 },
 };
 
+static void vfio_pci_ramfb_dev_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+dc->hotpluggable = false;
+}
+
+static const TypeInfo vfio_pci_ramfb_dev_info = {
+.name = "vfio-pci-ramfb",
+.parent = "vfio-pci",
+.instance_size = sizeof(VFIOPCIDevice),
+.class_init = vfio_pci_ramfb_dev_class_init,
+};
+
 static void register_vfio_pci_dev_type(void)
 {
 type_register_static(&vfio_pci_dev_info);
+type_register_static(&vfio_pci_ramfb_dev_info);
 }
 
 type_init(register_vfio_pci_dev_type)
-- 
2.9.3




[Qemu-devel] [PATCH v4 0/3] ramfb: simple boot framebuffer

2018-06-13 Thread Gerd Hoffmann
  Hi,

So, the first ramfb bits should be ready for merge.  This series
includes the ramfb core support bits, the ramfb standalone device
and vfio-pci-ramfb device for vgpu boot display support.

If you want play with it I recommend getting the bits from

https://www.kraxel.org/cgit/qemu/log/?h=sirius/ramfb

because they come with an updated seabios and a new vgabios rom and an
experimental OVMF build.  Firmware patches are here:

https://www.kraxe.org/cgit/seabios/log/?h=ramfb
https://github.com/kraxel/edk2/commits/ramfb

They should land upstream soon.

cheers,
  Gerd

Gerd Hoffmann (3):
  hw/display: add ramfb, a simple boot framebuffer living in guest ram
  hw/display: add standalone ramfb device
  hw/vfio/display: add ramfb support

 include/hw/display/ramfb.h| 12 ++
 include/hw/vfio/vfio-common.h |  2 +
 hw/arm/sysbus-fdt.c   |  7 
 hw/arm/virt.c |  2 +
 hw/display/ramfb-standalone.c | 62 
 hw/display/ramfb.c| 95 +++
 hw/i386/pc_piix.c |  2 +
 hw/i386/pc_q35.c  |  2 +
 hw/vfio/display.c | 10 +
 hw/vfio/pci.c | 15 +++
 hw/display/Makefile.objs  |  3 ++
 11 files changed, 212 insertions(+)
 create mode 100644 include/hw/display/ramfb.h
 create mode 100644 hw/display/ramfb-standalone.c
 create mode 100644 hw/display/ramfb.c

-- 
2.9.3




Re: [Qemu-devel] [PATCH] tracing.txt: add missing '-' for trace option

2018-06-13 Thread Yaowei Bai
On Tue, Jun 12, 2018 at 11:50:30PM -0400, Emilio G. Cota wrote:
> On Tue, Jun 12, 2018 at 23:15:49 -0400, Yaowei Bai wrote:
> > Signed-off-by: Yaowei Bai 
> > ---
> >  docs/devel/tracing.txt | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/docs/devel/tracing.txt b/docs/devel/tracing.txt
> > index 07abbb3..6ab32cc 100644
> > --- a/docs/devel/tracing.txt
> > +++ b/docs/devel/tracing.txt
> > @@ -18,7 +18,7 @@ for debugging, profiling, and observing execution.
> >  
> >  3. Run the virtual machine to produce a trace file:
> >  
> > -qemu -trace events=/tmp/events ... # your normal QEMU invocation
> > +qemu --trace events=/tmp/events ... # your normal QEMU invocation
> 
> The second dash is not missing; both '-args' and '--args' work.
> 
> '-args' seems to be favoured though; see `qemu -h' or `qemu --h' :-)

Oh, you're right. I just checked qemu-nbd/io utilities. Forget about
this change. Thanks. :-)

> 
>   Emilio





Re: [Qemu-devel] [PATCH 6/7] target/ppc: Replace intc pointer with a general machine_data pointer

2018-06-13 Thread Cédric Le Goater
On 06/13/2018 08:57 AM, David Gibson wrote:
> PowerPCCPU contains an (Object *)intc used to point to the cpu's interrupt
> controller. Or more precisely to the "presentation" component of the
> interrupt controller relevant to this cpu.

yes and that made sense in terms of modeling because you actually have a 
set of wires between the presenter and the cores of a system.

> Really, this field is machine specific.  The machines which use it can
> point it to different types of object depending on their needs, and most
> machines don't use it at all (since they have older style PICs which don't
> have per-cpu presentation components).
> 
> There's also other information that's per-cpu, but platform/machine
> specific.  So replace the intc pointer with a (void *)machine_data which
> can be managed as the machine type likes to conveniently store per cpu
> information.

ah. so you have something else the store in the machine_data. 

If you were defining a type, we would have some more checks when 
casting the machine_data field. We also could parent the object 
to the CPU also. This is minor.


The change should be compatible with the XIVE change which need 
to allocate a different type of presenter. So, sPAPRCPUState and 
PnvCPUState would look like :

typedef struct sPAPRCPUState {
ICPState *icp;
XiveTCTX *tctx;
} sPAPRCPUState;

and the call to ipc_create() will move in an operation of the 
sPAPR IRQ backend, if that exists oneday, and in an operation of 
the PnvChip to handle the differences in the interrupt controller
in use by the machine. 

So no big difference, but the cpu machine_data won't be populated
from the core but from the machine. I hope this is compatible
with the next changes.

Thanks,

C.

> 
> Signed-off-by: David Gibson 
> ---
>  hw/intc/xics.c  |  5 +++--
>  hw/intc/xics_spapr.c| 16 +++-
>  hw/ppc/pnv.c|  4 ++--
>  hw/ppc/pnv_core.c   | 11 +--
>  hw/ppc/spapr.c  |  8 
>  hw/ppc/spapr_cpu_core.c | 13 ++---
>  include/hw/ppc/pnv_core.h   |  9 +
>  include/hw/ppc/spapr_cpu_core.h | 10 ++
>  include/hw/ppc/xics.h   |  4 ++--
>  target/ppc/cpu.h|  2 +-
>  10 files changed, 61 insertions(+), 21 deletions(-)
> 
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index e73e623e3b..689ad44e5f 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -383,7 +383,8 @@ static const TypeInfo icp_info = {
>  .class_size = sizeof(ICPStateClass),
>  };
>  
> -Object *icp_create(Object *cpu, const char *type, XICSFabric *xi, Error 
> **errp)
> +ICPState *icp_create(Object *cpu, const char *type, XICSFabric *xi,
> + Error **errp)
>  {
>  Error *local_err = NULL;
>  Object *obj;
> @@ -401,7 +402,7 @@ Object *icp_create(Object *cpu, const char *type, 
> XICSFabric *xi, Error **errp)
>  obj = NULL;
>  }
>  
> -return obj;
> +return ICP(obj);
>  }
>  
>  /*
> diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
> index 2e27b92b87..01c76717cf 100644
> --- a/hw/intc/xics_spapr.c
> +++ b/hw/intc/xics_spapr.c
> @@ -31,6 +31,7 @@
>  #include "trace.h"
>  #include "qemu/timer.h"
>  #include "hw/ppc/spapr.h"
> +#include "hw/ppc/spapr_cpu_core.h"
>  #include "hw/ppc/xics.h"
>  #include "hw/ppc/fdt.h"
>  #include "qapi/visitor.h"
> @@ -43,8 +44,9 @@ static target_ulong h_cppr(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
> target_ulong opcode, target_ulong *args)
>  {
>  target_ulong cppr = args[0];
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
>  
> -icp_set_cppr(ICP(cpu->intc), cppr);
> +icp_set_cppr(spapr_cpu->icp, cppr);
>  return H_SUCCESS;
>  }
>  
> @@ -65,7 +67,8 @@ static target_ulong h_ipi(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
>  static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> target_ulong opcode, target_ulong *args)
>  {
> -uint32_t xirr = icp_accept(ICP(cpu->intc));
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
> +uint32_t xirr = icp_accept(spapr_cpu->icp);
>  
>  args[0] = xirr;
>  return H_SUCCESS;
> @@ -74,7 +77,8 @@ static target_ulong h_xirr(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
>  static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>   target_ulong opcode, target_ulong *args)
>  {
> -uint32_t xirr = icp_accept(ICP(cpu->intc));
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
> +uint32_t xirr = icp_accept(spapr_cpu->icp);
>  
>  args[0] = xirr;
>  args[1] = cpu_get_host_ticks();
> @@ -84,9 +88,10 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
>  static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>target_ulong opcode, target_ulong *args)
>  {
> +sPAPRCPUSta

Re: [Qemu-devel] [PATCH] docs: add details regarding submitting stable-specific patches

2018-06-13 Thread Cornelia Huck
On Tue, 12 Jun 2018 17:19:23 -0500
Michael Roth  wrote:

> Also add a few more details regarding normal master->stable patch
> flow.
> 
> Cc: Cornelia Huck 
> Signed-off-by: Michael Roth 
> ---
>  docs/devel/stable-process.rst | 15 +++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/docs/devel/stable-process.rst b/docs/devel/stable-process.rst
> index 98736a9ea4..008fb4f9b4 100644
> --- a/docs/devel/stable-process.rst
> +++ b/docs/devel/stable-process.rst
> @@ -56,6 +56,21 @@ There are various ways to get a patch into stable:
>appropriate other people (like the patch author or the relevant maintainer)
>on copy.
>  
> +Generally patches for a stable release/branch must first be included in the
> +master QEMU branch, and from there the stable branch's maintainer will
> +cherry-pick/backport the patch. However, occasionally there is a need to
> +apply a stable-specific patch, such as a fix from master that isn't easily
> +cherry-picked and has been reworked/backported specifically for a stable
> +branch, or a fix that's only necessary in the context of a particular stable
> +release. In such cases, the patch can be submitted to qemu-devel@nongnu.org
> +using Cc: qemu-sta...@nongnu.org just like a normal stable patch, but tagged
> +with a "for-stable" or a "for-" in the subject line to
> +indicate that's it's specifically for a stable branch and not master. For

s/that's/that/

> +instance:
> +
> +  [PATCH for-2.11.2] spapr: make pseries-2.11 the default machine type
> +
> +
>  Stable release process
>  --
>  

Reviewed-by: Cornelia Huck 



[Qemu-devel] [PATCH 0/9] KVM/ARM: virt-3.0: Multiple redistributor regions and 256MB ECAM region

2018-06-13 Thread Eric Auger
This series increases the number of vcpus usable in accelerated mode
along with GICv3 and allows up to 256 PCIe busses.

It is a combination of 2 series:
[1] [RFC v3 0/8] KVM/ARM: Relax the max 123 vcpus limitation along
with KVM GICv3
[2] [PATCH 0/2] ARM virt: Support up to 256 PCIe buses

Both add features to the 3.0 virt machine.

VCPU changes:
-

Currently the max number of VCPUs usable along with the KVM GICv3
device is limited to 123. The rationale is a single redistributor
region was supported and this latter was set to [0x80A, 0x900]
within the guest physical address space, surrounded with DIST and UART
MMIO regions.

The 4.18 host kernel now allows to register several redistributor regions.
So this series overcomes the max 123 vcpu limitation by registering
a new redistributor region located just after the VIRT_MEM RAM region.
This second redistributor region has a capacity of 512 redistributors.

The max supported VCPUs in non accelerated mode is not modified.

PCIe BUS changes:
-

Current Machvirt PCI host controller's ECAM region is 16MB large.
This limits the number of PCIe buses to 16.

PC/Q35 machines have a 256MB region allowing up to 256 buses.
This series tries to bridge the gap.

It declares a new ECAM region located beyond 256GB, of size 256MB
The new ECAM region is used if:
- highmem option is set (default) and,
- either FW is not loaded or we are run an aarch64 guest
- machine type >= 3.0.

aarch32 FW does not support this highmem ECAM region. For guests
without LPAE support the highmem option must be turned off.

Best Regards

Eric

This QEMU series can be found at:
Previous version:
https://github.com/eauger/qemu/tree/v2.12.0-virt3.0-v1

Eric Auger (9):
  linux-headers: Update to 4.18-rc0
  target/arm: Allow KVM device address overwriting
  hw/intc/arm_gicv3: Introduce redist-region-count array property
  hw/intc/arm_gicv3_kvm: Get prepared to handle multiple redist regions
  hw/arm/virt: GICv3 DT node with one or two redistributor regions
  hw/arm/virt-acpi-build: Advertise one or two GICR structures
  hw/arm/virt: Register two redistributor regions when necessary
  hw/arm/virt: Add a new 256MB ECAM region
  hw/arm/virt: Add virt-3.0 machine type

 hw/arm/virt-acpi-build.c   |  30 --
 hw/arm/virt.c  | 104 +
 hw/intc/arm_gic_kvm.c  |   4 +-
 hw/intc/arm_gicv3.c|  12 ++-
 hw/intc/arm_gicv3_common.c |  38 +++-
 hw/intc/arm_gicv3_its_kvm.c|   2 +-
 hw/intc/arm_gicv3_kvm.c|  40 +++-
 include/hw/arm/virt.h  |  17 
 include/hw/intc/arm_gicv3_common.h |   8 +-
 include/standard-headers/linux/pci_regs.h  |   8 ++
 include/standard-headers/linux/virtio_gpu.h|   1 +
 include/standard-headers/linux/virtio_net.h|   3 +
 .../LICENSES/exceptions/Linux-syscall-note |   2 +-
 linux-headers/LICENSES/preferred/GPL-2.0   |   6 ++
 linux-headers/asm-arm/kvm.h|   1 +
 linux-headers/asm-arm/unistd-common.h  |   1 +
 linux-headers/asm-arm64/kvm.h  |   1 +
 linux-headers/asm-generic/unistd.h |   4 +-
 linux-headers/asm-powerpc/unistd.h |   1 +
 linux-headers/asm-x86/unistd_32.h  |   2 +
 linux-headers/asm-x86/unistd_64.h  |   2 +
 linux-headers/asm-x86/unistd_x32.h |   2 +
 linux-headers/linux/kvm.h  |   5 +-
 linux-headers/linux/psp-sev.h  |  12 +++
 target/arm/kvm.c   |  10 +-
 target/arm/kvm_arm.h   |   3 +-
 26 files changed, 274 insertions(+), 45 deletions(-)

-- 
2.5.5




[Qemu-devel] [PATCH 8/9] hw/arm/virt: Add a new 256MB ECAM region

2018-06-13 Thread Eric Auger
This patch defines a new ECAM region located after the 256GB limit.

The virt machine state is augmented with a new highmem_ecam field
which guards the usage of this new ECAM region instead of the legacy
16MB one. With the highmem ECAM region, up to 256 PCIe buses can be
used.

Signed-off-by: Eric Auger 
Reviewed-by: Laszlo Ersek 

---

RFC -> PATCH:
- remove the newline at the end of acpi_dsdt_add_pci
- use vms->highmem_ecam to select the memmap id
---
 hw/arm/virt-acpi-build.c | 21 +
 hw/arm/virt.c| 12 
 include/hw/arm/virt.h|  2 ++
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index eefd1d4..4409a51 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -150,16 +150,17 @@ static void acpi_dsdt_add_virtio(Aml *scope,
 }
 
 static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
-  uint32_t irq, bool use_highmem)
+  uint32_t irq, bool use_highmem, bool 
highmem_ecam)
 {
+int ecam_id = highmem_ecam ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM;
 Aml *method, *crs, *ifctx, *UUID, *ifctx1, *elsectx, *buf;
 int i, bus_no;
 hwaddr base_mmio = memmap[VIRT_PCIE_MMIO].base;
 hwaddr size_mmio = memmap[VIRT_PCIE_MMIO].size;
 hwaddr base_pio = memmap[VIRT_PCIE_PIO].base;
 hwaddr size_pio = memmap[VIRT_PCIE_PIO].size;
-hwaddr base_ecam = memmap[VIRT_PCIE_ECAM].base;
-hwaddr size_ecam = memmap[VIRT_PCIE_ECAM].size;
+hwaddr base_ecam = memmap[ecam_id].base;
+hwaddr size_ecam = memmap[ecam_id].size;
 int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN;
 
 Aml *dev = aml_device("%s", "PCI0");
@@ -173,7 +174,7 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry 
*memmap,
 aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
 
 /* Declare the PCI Routing Table. */
-Aml *rt_pkg = aml_package(nr_pcie_buses * PCI_NUM_PINS);
+Aml *rt_pkg = aml_varpackage(nr_pcie_buses * PCI_NUM_PINS);
 for (bus_no = 0; bus_no < nr_pcie_buses; bus_no++) {
 for (i = 0; i < PCI_NUM_PINS; i++) {
 int gsi = (i + bus_no) % PCI_NUM_PINS;
@@ -316,7 +317,10 @@ static void acpi_dsdt_add_pci(Aml *scope, const 
MemMapEntry *memmap,
 Aml *dev_res0 = aml_device("%s", "RES0");
 aml_append(dev_res0, aml_name_decl("_HID", aml_string("PNP0C02")));
 crs = aml_resource_template();
-aml_append(crs, aml_memory32_fixed(base_ecam, size_ecam, AML_READ_WRITE));
+aml_append(crs,
+aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED,
+ AML_NON_CACHEABLE, AML_READ_WRITE, 0x, base_ecam,
+ base_ecam + size_ecam - 1, 0x, size_ecam));
 aml_append(dev_res0, aml_name_decl("_CRS", crs));
 aml_append(dev, dev_res0);
 aml_append(scope, dev);
@@ -573,16 +577,17 @@ build_mcfg(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 {
 AcpiTableMcfg *mcfg;
 const MemMapEntry *memmap = vms->memmap;
+int ecam_id = vms->highmem_ecam ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM;
 int len = sizeof(*mcfg) + sizeof(mcfg->allocation[0]);
 int mcfg_start = table_data->len;
 
 mcfg = acpi_data_push(table_data, len);
-mcfg->allocation[0].address = cpu_to_le64(memmap[VIRT_PCIE_ECAM].base);
+mcfg->allocation[0].address = cpu_to_le64(memmap[ecam_id].base);
 
 /* Only a single allocation so no need to play with segments */
 mcfg->allocation[0].pci_segment = cpu_to_le16(0);
 mcfg->allocation[0].start_bus_number = 0;
-mcfg->allocation[0].end_bus_number = (memmap[VIRT_PCIE_ECAM].size
+mcfg->allocation[0].end_bus_number = (memmap[ecam_id].size
   / PCIE_MMCFG_SIZE_MIN) - 1;
 
 build_header(linker, table_data, (void *)(table_data->data + mcfg_start),
@@ -766,7 +771,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO],
 (irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS);
 acpi_dsdt_add_pci(scope, memmap, (irqmap[VIRT_PCIE] + ARM_SPI_BASE),
-  vms->highmem);
+  vms->highmem, vms->highmem_ecam);
 acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO],
(irqmap[VIRT_GPIO] + ARM_SPI_BASE));
 acpi_dsdt_add_power_button(scope);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 2a1c0fb..22b9bd1 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -150,6 +150,7 @@ static const MemMapEntry a15memmap[] = {
 [VIRT_MEM] ={ 0x4000, RAMLIMIT_BYTES },
 /* Additional 64 MB redist region (can contain up to 512 redistributors) */
 [VIRT_GIC_REDIST2] ={ 0x40ULL, 0x400ULL },
+[VIRT_PCIE_ECAM_HIGH] = { 0x401000ULL, 0x1000 },
 /* Second PCIe window, 512GB wide at the 512GB boundary */
 [VIRT_

[Qemu-devel] [PATCH 4/9] hw/intc/arm_gicv3_kvm: Get prepared to handle multiple redist regions

2018-06-13 Thread Eric Auger
Let's check if KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION is supported.
If not, we check the number of redist region is equal to 1 and use the
legacy KVM_VGIC_V3_ADDR_TYPE_REDIST attribute. Otherwise we use
the new attribute and allow to register multiple regions to the
KVM device.

Signed-off-by: Eric Auger 
Reviewed-by: Peter Maydell 

---

v2 -> v3:
- In kvm_arm_gicv3_realize rename val into add_ormask local variable and
  add a comment
- start the redist region registration  from s->nb_redist_regions - 1
  downwards
---
 hw/intc/arm_gicv3_kvm.c | 33 ++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index ed7b719..52e6e70 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -753,6 +753,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
**errp)
 {
 GICv3State *s = KVM_ARM_GICV3(dev);
 KVMARMGICv3Class *kgc = KVM_ARM_GICV3_GET_CLASS(s);
+bool multiple_redist_region_allowed;
 Error *local_err = NULL;
 int i;
 
@@ -789,6 +790,18 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
+multiple_redist_region_allowed =
+kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+  KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION);
+
+if (!multiple_redist_region_allowed && s->nb_redist_regions > 1) {
+error_setg(errp, "Multiple VGICv3 redistributor regions are not "
+   "supported by this host kernel");
+error_append_hint(errp, "A maximum of %d VCPUs can be used",
+  s->redist_region_count[0]);
+return;
+}
+
 kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
   0, &s->num_irq, true, &error_abort);
 
@@ -798,9 +811,23 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
**errp)
 
 kvm_arm_register_device(&s->iomem_dist, -1, KVM_DEV_ARM_VGIC_GRP_ADDR,
 KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd, 0);
-kvm_arm_register_device(&s->iomem_redist[0], -1,
-KVM_DEV_ARM_VGIC_GRP_ADDR,
-KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0);
+
+if (!multiple_redist_region_allowed) {
+kvm_arm_register_device(&s->iomem_redist[0], -1,
+KVM_DEV_ARM_VGIC_GRP_ADDR,
+KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0);
+} else {
+for (i = s->nb_redist_regions - 1; i >= 0; i--) {
+/* Address mask made of the rdist region index and count */
+uint64_t addr_ormask =
+i | ((uint64_t)s->redist_region_count[i] << 52);
+
+kvm_arm_register_device(&s->iomem_redist[i], -1,
+KVM_DEV_ARM_VGIC_GRP_ADDR,
+KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION,
+s->dev_fd, addr_ormask);
+}
+}
 
 if (kvm_has_gsi_routing()) {
 /* set up irq routing */
-- 
2.5.5




[Qemu-devel] [PATCH 2/9] target/arm: Allow KVM device address overwriting

2018-06-13 Thread Eric Auger
for KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attribute, the attribute
data pointed to by kvm_device_attr.addr is a OR of the
redistributor region address and other fields such as the index
of the redistributor region and the number of redistributors the
region can contain.

The existing machine init done notifier framework sets the address
field to the actual address of the device and does not allow to OR
this value with other fields.

This patch extends the KVMDevice struct with a new kda_addr_ormask
member. Its value is passed at registration time and OR'ed with the
resolved address on kvm_arm_set_device_addr().

Signed-off-by: Eric Auger 
Reviewed-by: Peter Maydell 

---

v2 -> v3:
- s/addr_fixup/add_ormask
- Added Peter's R-b
---
 hw/intc/arm_gic_kvm.c   |  4 ++--
 hw/intc/arm_gicv3_its_kvm.c |  2 +-
 hw/intc/arm_gicv3_kvm.c |  4 ++--
 target/arm/kvm.c| 10 +-
 target/arm/kvm_arm.h|  3 ++-
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 204369d..8666508 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -558,7 +558,7 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error 
**errp)
 | KVM_VGIC_V2_ADDR_TYPE_DIST,
 KVM_DEV_ARM_VGIC_GRP_ADDR,
 KVM_VGIC_V2_ADDR_TYPE_DIST,
-s->dev_fd);
+s->dev_fd, 0);
 /* CPU interface for current core. Unlike arm_gic, we don't
  * provide the "interface for core #N" memory regions, because
  * cores with a VGIC don't have those.
@@ -568,7 +568,7 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error 
**errp)
 | KVM_VGIC_V2_ADDR_TYPE_CPU,
 KVM_DEV_ARM_VGIC_GRP_ADDR,
 KVM_VGIC_V2_ADDR_TYPE_CPU,
-s->dev_fd);
+s->dev_fd, 0);
 
 if (kvm_has_gsi_routing()) {
 /* set up irq routing */
diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c
index eea6a73..271ebe4 100644
--- a/hw/intc/arm_gicv3_its_kvm.c
+++ b/hw/intc/arm_gicv3_its_kvm.c
@@ -103,7 +103,7 @@ static void kvm_arm_its_realize(DeviceState *dev, Error 
**errp)
 
 /* register the base address */
 kvm_arm_register_device(&s->iomem_its_cntrl, -1, KVM_DEV_ARM_VGIC_GRP_ADDR,
-KVM_VGIC_ITS_ADDR_TYPE, s->dev_fd);
+KVM_VGIC_ITS_ADDR_TYPE, s->dev_fd, 0);
 
 gicv3_its_init_mmio(s, NULL);
 
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 5649cac..46d9afb 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -793,9 +793,9 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
**errp)
   KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true, &error_abort);
 
 kvm_arm_register_device(&s->iomem_dist, -1, KVM_DEV_ARM_VGIC_GRP_ADDR,
-KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd);
+KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd, 0);
 kvm_arm_register_device(&s->iomem_redist, -1, KVM_DEV_ARM_VGIC_GRP_ADDR,
-KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd);
+KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0);
 
 if (kvm_has_gsi_routing()) {
 /* set up irq routing */
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 98f5006..867fef8 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -184,10 +184,15 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu)
  * We use a MemoryListener to track mapping and unmapping of
  * the regions during board creation, so the board models don't
  * need to do anything special for the KVM case.
+ *
+ * Sometimes the address must be OR'ed with some other fields
+ * (for example for KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION).
+ * @kda_addr_ormask aims at storing the value of those fields.
  */
 typedef struct KVMDevice {
 struct kvm_arm_device_addr kda;
 struct kvm_device_attr kdattr;
+uint64_t kda_addr_ormask;
 MemoryRegion *mr;
 QSLIST_ENTRY(KVMDevice) entries;
 int dev_fd;
@@ -234,6 +239,8 @@ static void kvm_arm_set_device_addr(KVMDevice *kd)
  */
 if (kd->dev_fd >= 0) {
 uint64_t addr = kd->kda.addr;
+
+addr |= kd->kda_addr_ormask;
 attr->addr = (uintptr_t)&addr;
 ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
 } else {
@@ -266,7 +273,7 @@ static Notifier notify = {
 };
 
 void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
- uint64_t attr, int dev_fd)
+ uint64_t attr, int dev_fd, uint64_t addr_ormask)
 {
 KVMDevice *kd;
 
@@ -286,6 +293,7 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t 
devid, uint64_t group,
 kd->kdattr.group = group;
 kd->kdattr.attr = attr;
 kd->dev_fd = 

[Qemu-devel] [PATCH 1/9] linux-headers: Update to 4.18-rc0

2018-06-13 Thread Eric Auger
commit b357bf6023a948cf6a9472f07a1b0caac0e4f8e8
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Signed-off-by: Eric Auger 
---
 include/standard-headers/linux/pci_regs.h|  8 
 include/standard-headers/linux/virtio_gpu.h  |  1 +
 include/standard-headers/linux/virtio_net.h  |  3 +++
 linux-headers/LICENSES/exceptions/Linux-syscall-note |  2 +-
 linux-headers/LICENSES/preferred/GPL-2.0 |  6 ++
 linux-headers/asm-arm/kvm.h  |  1 +
 linux-headers/asm-arm/unistd-common.h|  1 +
 linux-headers/asm-arm64/kvm.h|  1 +
 linux-headers/asm-generic/unistd.h   |  4 +++-
 linux-headers/asm-powerpc/unistd.h   |  1 +
 linux-headers/asm-x86/unistd_32.h|  2 ++
 linux-headers/asm-x86/unistd_64.h|  2 ++
 linux-headers/asm-x86/unistd_x32.h   |  2 ++
 linux-headers/linux/kvm.h|  5 +++--
 linux-headers/linux/psp-sev.h| 12 
 15 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/include/standard-headers/linux/pci_regs.h 
b/include/standard-headers/linux/pci_regs.h
index 103ba79..4da87e2 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -506,6 +506,8 @@
 #define  PCI_EXP_DEVCTL_READRQ_256B  0x1000 /* 256 Bytes */
 #define  PCI_EXP_DEVCTL_READRQ_512B  0x2000 /* 512 Bytes */
 #define  PCI_EXP_DEVCTL_READRQ_1024B 0x3000 /* 1024 Bytes */
+#define  PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */
+#define  PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */
 #define  PCI_EXP_DEVCTL_BCR_FLR 0x8000  /* Bridge Configuration Retry / FLR */
 #define PCI_EXP_DEVSTA 10  /* Device Status */
 #define  PCI_EXP_DEVSTA_CED0x0001  /* Correctable Error Detected */
@@ -655,6 +657,11 @@
 #define  PCI_EXP_LNKCAP2_SLS_16_0GB0x0010 /* Supported Speed 16GT/s */
 #define  PCI_EXP_LNKCAP2_CROSSLINK 0x0100 /* Crosslink supported */
 #define PCI_EXP_LNKCTL248  /* Link Control 2 */
+#define PCI_EXP_LNKCTL2_TLS0x000f
+#define PCI_EXP_LNKCTL2_TLS_2_5GT  0x0001 /* Supported Speed 2.5GT/s */
+#define PCI_EXP_LNKCTL2_TLS_5_0GT  0x0002 /* Supported Speed 5GT/s */
+#define PCI_EXP_LNKCTL2_TLS_8_0GT  0x0003 /* Supported Speed 8GT/s */
+#define PCI_EXP_LNKCTL2_TLS_16_0GT 0x0004 /* Supported Speed 16GT/s */
 #define PCI_EXP_LNKSTA250  /* Link Status 2 */
 #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52  /* v2 endpoints with link end 
here */
 #define PCI_EXP_SLTCAP252  /* Slot Capabilities 2 */
@@ -981,6 +988,7 @@
 #define  PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000  /* ERR_COR signal on DL_Active 
supported */
 
 #define PCI_EXP_DPC_CTL6   /* DPC control */
+#define  PCI_EXP_DPC_CTL_EN_FATAL  0x0001  /* Enable trigger on ERR_FATAL 
message */
 #define  PCI_EXP_DPC_CTL_EN_NONFATAL   0x0002  /* Enable trigger on 
ERR_NONFATAL message */
 #define  PCI_EXP_DPC_CTL_INT_EN0x0008  /* DPC Interrupt Enable */
 
diff --git a/include/standard-headers/linux/virtio_gpu.h 
b/include/standard-headers/linux/virtio_gpu.h
index c1c8f07..52a830d 100644
--- a/include/standard-headers/linux/virtio_gpu.h
+++ b/include/standard-headers/linux/virtio_gpu.h
@@ -260,6 +260,7 @@ struct virtio_gpu_cmd_submit {
 };
 
 #define VIRTIO_GPU_CAPSET_VIRGL 1
+#define VIRTIO_GPU_CAPSET_VIRGL2 2
 
 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */
 struct virtio_gpu_get_capset_info {
diff --git a/include/standard-headers/linux/virtio_net.h 
b/include/standard-headers/linux/virtio_net.h
index e9f255e..260c368 100644
--- a/include/standard-headers/linux/virtio_net.h
+++ b/include/standard-headers/linux/virtio_net.h
@@ -57,6 +57,9 @@
 * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23  /* Set MAC address */
 
+#define VIRTIO_NET_F_STANDBY 62/* Act as standby for another device
+* with the same MAC.
+*/
 #define VIRTIO_NET_F_SPEED_DUPLEX 63   /* Device set linkspeed and duplex */
 
 #ifndef VIRTIO_NET_NO_LEGACY
diff --git a/linux-headers/LICENSES/exceptions/Linux-syscall-note 
b/linux-headers/LICENSES/exceptions/Linux-syscall-note
index 6b60b61..9abdad7 100644
--- a/linux-headers/LICENSES/exceptions/Linux-syscall-note
+++ b/linux-headers/LICENSES/exceptions/Linux-syscall-note
@@ -1,6 +1,6 @@
 SPDX-Exception-Identifier: Linux-syscall-note
 SPDX-URL: https://spdx.org/licenses/Linux-syscall-note.html
-SPDX-Licenses: GPL-2.0, GPL-2.0+, GPL-1.0+, LGPL-2.0, LGPL-2.0+, LGPL-2.1, 
LGPL-2.1+
+SPDX-Licenses: GPL-2.0, GPL-2.0+, GPL-1.0+, LGPL-2.0, LGPL-2.0+, LGPL-2.1, 
LGPL-2.1+, GPL-2.0-only, GPL-2.0-or-later
 Usage-Guide:
   This exception is used together with one of the above SPDX-Licenses
   to mark user 

[Qemu-devel] [PATCH 6/9] hw/arm/virt-acpi-build: Advertise one or two GICR structures

2018-06-13 Thread Eric Auger
Depending on the number of smp_cpus we now register one or two
GICR structures.

Signed-off-by: Eric Auger 
---
 hw/arm/virt-acpi-build.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 74f5744..eefd1d4 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -670,6 +670,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 
 if (vms->gic_version == 3) {
 AcpiMadtGenericTranslator *gic_its;
+int nb_redist_regions = virt_gicv3_redist_region_count(vms);
 AcpiMadtGenericRedistributor *gicr = acpi_data_push(table_data,
  sizeof *gicr);
 
@@ -678,6 +679,14 @@ build_madt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST].base);
 gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST].size);
 
+if (nb_redist_regions == 2) {
+gicr = acpi_data_push(table_data, sizeof(*gicr));
+gicr->type = ACPI_APIC_GENERIC_REDISTRIBUTOR;
+gicr->length = sizeof(*gicr);
+gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST2].base);
+gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST2].size);
+}
+
 if (its_class_name() && !vmc->no_its) {
 gic_its = acpi_data_push(table_data, sizeof *gic_its);
 gic_its->type = ACPI_APIC_GENERIC_TRANSLATOR;
-- 
2.5.5




[Qemu-devel] [PATCH 3/9] hw/intc/arm_gicv3: Introduce redist-region-count array property

2018-06-13 Thread Eric Auger
To prepare for multiple redistributor regions, we introduce
an array of uint32_t properties that stores the redistributor
count of each redistributor region.

Non accelerated VGICv3 only supports a single redistributor region.
The capacity of all redist regions is checked against the number of
vcpus.

Machvirt is updated to set those properties, ie. a single
redistributor region with count set to the number of vcpus
capped by 123.

Signed-off-by: Eric Auger 

---
v2 -> v3:
- add missing return in arm_gic_realize
- in gicv3_init_irqs_and_mmio, compute/check rdist_capacity first
- rdist region 0 size set to MIN(smp_cpus, redist0_capacity)
- add GICV3_REDIST_SIZE
---
 hw/arm/virt.c  | 11 ++-
 hw/intc/arm_gicv3.c| 12 +++-
 hw/intc/arm_gicv3_common.c | 38 +-
 hw/intc/arm_gicv3_kvm.c|  9 +++--
 include/hw/intc/arm_gicv3_common.h |  8 ++--
 5 files changed, 67 insertions(+), 11 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f0a4fa0..2885d18 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -522,6 +522,15 @@ static void create_gic(VirtMachineState *vms, qemu_irq 
*pic)
 if (!kvm_irqchip_in_kernel()) {
 qdev_prop_set_bit(gicdev, "has-security-extensions", vms->secure);
 }
+
+if (type == 3) {
+uint32_t redist0_capacity =
+vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE;
+uint32_t redist0_count = MIN(smp_cpus, redist0_capacity);
+
+qdev_prop_set_uint32(gicdev, "len-redist-region-count", 1);
+qdev_prop_set_uint32(gicdev, "redist-region-count[0]", redist0_count);
+}
 qdev_init_nofail(gicdev);
 gicbusdev = SYS_BUS_DEVICE(gicdev);
 sysbus_mmio_map(gicbusdev, 0, vms->memmap[VIRT_GIC_DIST].base);
@@ -1321,7 +1330,7 @@ static void machvirt_init(MachineState *machine)
  * many redistributors we can fit into the memory map.
  */
 if (vms->gic_version == 3) {
-virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x2;
+virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE;
 } else {
 virt_max_cpus = GIC_NCPU;
 }
diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c
index 479c667..7044133 100644
--- a/hw/intc/arm_gicv3.c
+++ b/hw/intc/arm_gicv3.c
@@ -373,7 +373,17 @@ static void arm_gic_realize(DeviceState *dev, Error **errp)
 return;
 }
 
-gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops);
+if (s->nb_redist_regions != 1) {
+error_setg(errp, "VGICv3 redist region number(%d) not equal to 1",
+   s->nb_redist_regions);
+return;
+}
+
+gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
 
 gicv3_init_cpuif(s);
 }
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 864b7c6..ff326b3 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -247,11 +247,22 @@ static const VMStateDescription vmstate_gicv3 = {
 };
 
 void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
-  const MemoryRegionOps *ops)
+  const MemoryRegionOps *ops, Error **errp)
 {
 SysBusDevice *sbd = SYS_BUS_DEVICE(s);
+int rdist_capacity = 0;
 int i;
 
+for (i = 0; i < s->nb_redist_regions; i++) {
+rdist_capacity += s->redist_region_count[i];
+}
+if (rdist_capacity < s->num_cpu) {
+error_setg(errp, "Capacity of the redist regions(%d) "
+   "is less than number of vcpus(%d)",
+   rdist_capacity, s->num_cpu);
+return;
+}
+
 /* For the GIC, also expose incoming GPIO lines for PPIs for each CPU.
  * GPIO array layout is thus:
  *  [0..N-1] spi
@@ -277,11 +288,18 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, 
qemu_irq_handler handler,
 
 memory_region_init_io(&s->iomem_dist, OBJECT(s), ops, s,
   "gicv3_dist", 0x1);
-memory_region_init_io(&s->iomem_redist, OBJECT(s), ops ? &ops[1] : NULL, s,
-  "gicv3_redist", 0x2 * s->num_cpu);
-
 sysbus_init_mmio(sbd, &s->iomem_dist);
-sysbus_init_mmio(sbd, &s->iomem_redist);
+
+s->iomem_redist = g_new0(MemoryRegion, s->nb_redist_regions);
+for (i = 0; i < s->nb_redist_regions; i++) {
+char *name = g_strdup_printf("gicv3_redist_region[%d]", i);
+
+memory_region_init_io(&s->iomem_redist[i], OBJECT(s),
+  ops ? &ops[1] : NULL, s, name,
+  s->redist_region_count[i] * GICV3_REDIST_SIZE);
+sysbus_init_mmio(sbd, &s->iomem_redist[i]);
+g_free(name);
+}
 }
 
 static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
@@ -363,6 +381,13 @@ static void arm_gicv3_common_realize(DeviceState *dev, 
Error **errp)

[Qemu-devel] [PATCH 5/9] hw/arm/virt: GICv3 DT node with one or two redistributor regions

2018-06-13 Thread Eric Auger
This patch allows the creation of a GICv3 node with 1 or 2
redistributor regions depending on the number of smu_cpus.
The second redistributor region is located just after the
existing RAM region, at 256GB and contains up to up to 512 vcpus.

Please refer to kernel documentation for further node details:
Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt

Signed-off-by: Eric Auger 

---

v2 -> v3:
- VIRT_GIC_REDIST2 is now 64MB large, ie. 512 redistributor capacity
- virt_gicv3_redist_region_count does not test kvm_irqchip_in_kernel
  anymore
---
 hw/arm/virt.c | 29 -
 include/hw/arm/virt.h | 14 ++
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 2885d18..5c02cc5 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -148,6 +148,8 @@ static const MemMapEntry a15memmap[] = {
 [VIRT_PCIE_PIO] =   { 0x3eff, 0x0001 },
 [VIRT_PCIE_ECAM] =  { 0x3f00, 0x0100 },
 [VIRT_MEM] ={ 0x4000, RAMLIMIT_BYTES },
+/* Additional 64 MB redist region (can contain up to 512 redistributors) */
+[VIRT_GIC_REDIST2] ={ 0x40ULL, 0x400ULL },
 /* Second PCIe window, 512GB wide at the 512GB boundary */
 [VIRT_PCIE_MMIO_HIGH] =   { 0x80ULL, 0x80ULL },
 };
@@ -401,13 +403,30 @@ static void fdt_add_gic_node(VirtMachineState *vms)
 qemu_fdt_setprop_cell(vms->fdt, "/intc", "#size-cells", 0x2);
 qemu_fdt_setprop(vms->fdt, "/intc", "ranges", NULL, 0);
 if (vms->gic_version == 3) {
+int nb_redist_regions = virt_gicv3_redist_region_count(vms);
+
 qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible",
 "arm,gic-v3");
-qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
- 2, vms->memmap[VIRT_GIC_DIST].base,
- 2, vms->memmap[VIRT_GIC_DIST].size,
- 2, vms->memmap[VIRT_GIC_REDIST].base,
- 2, vms->memmap[VIRT_GIC_REDIST].size);
+
+qemu_fdt_setprop_cell(vms->fdt, "/intc",
+  "#redistributor-regions", nb_redist_regions);
+
+if (nb_redist_regions == 1) {
+qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
+ 2, vms->memmap[VIRT_GIC_DIST].base,
+ 2, vms->memmap[VIRT_GIC_DIST].size,
+ 2, vms->memmap[VIRT_GIC_REDIST].base,
+ 2, vms->memmap[VIRT_GIC_REDIST].size);
+} else {
+qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
+ 2, vms->memmap[VIRT_GIC_DIST].base,
+ 2, vms->memmap[VIRT_GIC_DIST].size,
+ 2, vms->memmap[VIRT_GIC_REDIST].base,
+ 2, vms->memmap[VIRT_GIC_REDIST].size,
+ 2, vms->memmap[VIRT_GIC_REDIST2].base,
+ 2, 
vms->memmap[VIRT_GIC_REDIST2].size);
+}
+
 if (vms->virt) {
 qemu_fdt_setprop_cells(vms->fdt, "/intc", "interrupts",
GIC_FDT_IRQ_TYPE_PPI, ARCH_GICV3_MAINT_IRQ,
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 4ac7ef6..308156f 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -35,6 +35,8 @@
 #include "qemu/notify.h"
 #include "hw/boards.h"
 #include "hw/arm/arm.h"
+#include "sysemu/kvm.h"
+#include "hw/intc/arm_gicv3_common.h"
 
 #define NUM_GICV2M_SPIS   64
 #define NUM_VIRTIO_TRANSPORTS 32
@@ -60,6 +62,7 @@ enum {
 VIRT_GIC_V2M,
 VIRT_GIC_ITS,
 VIRT_GIC_REDIST,
+VIRT_GIC_REDIST2,
 VIRT_SMMU,
 VIRT_UART,
 VIRT_MMIO,
@@ -130,4 +133,15 @@ typedef struct {
 
 void virt_acpi_setup(VirtMachineState *vms);
 
+/* Return the number of used redistributor regions  */
+static inline int virt_gicv3_redist_region_count(VirtMachineState *vms)
+{
+uint32_t redist0_capacity =
+vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE;
+
+assert(vms->gic_version == 3);
+
+return vms->smp_cpus > redist0_capacity ? 2 : 1;
+}
+
 #endif /* QEMU_ARM_VIRT_H */
-- 
2.5.5




Re: [Qemu-devel] [RFC untested PATCH] i386/cpu: make -cpu host support monitor/mwait

2018-06-13 Thread Igor Mammedov
On Tue, 12 Jun 2018 11:49:22 -0300
Eduardo Habkost  wrote:

> On Tue, Jun 12, 2018 at 03:58:03PM +0200, Igor Mammedov wrote:
> [...]
> > > > > +if (xcc->host_cpuid_required && enable_cpu_pm) {
> > > > > +host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx,
> > > > > +   &cpu->mwait.ecx, &cpu->mwait.edx);
> > > > > +}
[...]
> > > > also max_x86_cpu_initfn() might be better place for filling it up.  
> 
> Why?
I've missed 'enable_cpu_pm' which is probably a property,
so yep it can't go into initfn.

However if we not going to migrate this state or use outside of
cpu_x86_cpuid(), I don't see why we should add it to X86CPU state
and keep around.
We can query it on demand from cpu_x86_cpuid() like we do for
PMU leaf.




[Qemu-devel] [PATCH 7/9] hw/arm/virt: Register two redistributor regions when necessary

2018-06-13 Thread Eric Auger
With a VGICv3 KVM device, if the number of vcpus exceeds the
capacity of the legacy redistributor region (123 redistributors),
we now attempt to register a second redistributor region. Up to
512 redistributors can fit in this latter on top of the 123 allowed
by the legacy redistributor region.

Registering this second redistributor region is possible if the
host kernel supports the following VGICv3 KVM device group/attribute:
KVM_DEV_ARM_VGIC_GRP_ADDR/KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION.

In case the host kernel does not support the registration of several
redistributor regions and the requested number of vcpus exceeds the
capacity of the legacy redistributor region, the GICv3 device
initialization fails with a proper error message and qemu exits.

At the moment the max number of vcpus still is capped by the
virt machine class max_cpus.

Signed-off-by: Eric Auger 

---

v2 -> v3:
- remove spare space
---
 hw/arm/virt.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5c02cc5..2a1c0fb 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -528,6 +528,7 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic)
 SysBusDevice *gicbusdev;
 const char *gictype;
 int type = vms->gic_version, i;
+uint32_t nb_redist_regions = 0;
 
 gictype = (type == 3) ? gicv3_class_name() : gic_class_name();
 
@@ -547,14 +548,28 @@ static void create_gic(VirtMachineState *vms, qemu_irq 
*pic)
 vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE;
 uint32_t redist0_count = MIN(smp_cpus, redist0_capacity);
 
-qdev_prop_set_uint32(gicdev, "len-redist-region-count", 1);
+nb_redist_regions = virt_gicv3_redist_region_count(vms);
+
+qdev_prop_set_uint32(gicdev, "len-redist-region-count",
+ nb_redist_regions);
 qdev_prop_set_uint32(gicdev, "redist-region-count[0]", redist0_count);
+
+if (nb_redist_regions == 2) {
+uint32_t redist1_capacity =
+vms->memmap[VIRT_GIC_REDIST2].size / GICV3_REDIST_SIZE;
+
+qdev_prop_set_uint32(gicdev, "redist-region-count[1]",
+MIN(smp_cpus - redist0_count, redist1_capacity));
+}
 }
 qdev_init_nofail(gicdev);
 gicbusdev = SYS_BUS_DEVICE(gicdev);
 sysbus_mmio_map(gicbusdev, 0, vms->memmap[VIRT_GIC_DIST].base);
 if (type == 3) {
 sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_REDIST].base);
+if (nb_redist_regions == 2) {
+sysbus_mmio_map(gicbusdev, 2, vms->memmap[VIRT_GIC_REDIST2].base);
+}
 } else {
 sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_CPU].base);
 }
@@ -1350,6 +1365,7 @@ static void machvirt_init(MachineState *machine)
  */
 if (vms->gic_version == 3) {
 virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE;
+virt_max_cpus += vms->memmap[VIRT_GIC_REDIST2].size / 
GICV3_REDIST_SIZE;
 } else {
 virt_max_cpus = GIC_NCPU;
 }
-- 
2.5.5




Re: [Qemu-devel] [Qemu-ppc] [PATCH v2 5/8] hw/timer: Add basic M41T80 emulation

2018-06-13 Thread BALATON Zoltan

On Wed, 13 Jun 2018, David Gibson wrote:

On Wed, Jun 06, 2018 at 07:35:28PM +0200, BALATON Zoltan wrote:

On Wed, 6 Jun 2018, Philippe Mathieu-Daudé wrote:

On 06/06/2018 10:31 AM, BALATON Zoltan wrote:

Basic emulation of the M41T80 serial (I2C) RTC chip. Only getting time
of day is implemented. Setting time and RTC alarm are not supported.

[...]

diff --git a/hw/timer/m41t80.c b/hw/timer/m41t80.c
new file mode 100644
index 000..9dbdb1b
--- /dev/null
+++ b/hw/timer/m41t80.c
@@ -0,0 +1,117 @@
+/*
+ * M41T80 serial rtc emulation
+ *
+ * Copyright (c) 2018 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "qemu/bcd.h"
+#include "hw/i2c/i2c.h"
+
+#define TYPE_M41T80 "m41t80"
+#define M41T80(obj) OBJECT_CHECK(M41t80State, (obj), TYPE_M41T80)
+
+typedef struct M41t80State {
+I2CSlave parent_obj;
+int8_t addr;
+} M41t80State;
+
+static void m41t80_realize(DeviceState *dev, Error **errp)
+{
+M41t80State *s = M41T80(dev);
+
+s->addr = -1;
+}
+
+static int m41t80_send(I2CSlave *i2c, uint8_t data)
+{
+M41t80State *s = M41T80(i2c);
+
+if (s->addr < 0) {
+s->addr = data;
+} else {
+s->addr++;
+}


What about adding enum i2c_event in M41t80State and use the enum here
rather than the addr < 0? Also this wrap at INT8_MAX = 127, is this
expected?


Thanks for the review. I guess we could add enum for device bytes and the
special case -1 meaning no register address selected yet but this is a very
simple device with only 20 bytes and the datasheet also lists them by number
without naming them so I think we can also refer to them by number. Since
the device has only this 20 bytes the case with 127 should also not be a
problem as that's invalid address anyway. Or did you mean something else?


So, I'm not particularly in favour of adding extra state variables.

But is using addr < 0 safe here?  You're assigning the uint8_t data to
addr - could that result in a negative value?


Why wouldn't it be safe with the expected values for register address 
between 0-19? If the guest sends garbage values over 127 it will either 
result in invalid register or unselected register and lead to an error 
when trying to read/write that register so I don't see what other problem 
this may cause.


The addr < 0 is to check if no address was selected before (on creating 
the device and when sending first value from host addr is set to -1. In 
this case first write will set register address, then subsequent 
reads/writes increment register address as the datasheet says).


Regards,
BALATON Zoltan


Re: [Qemu-devel] [PATCH v2 2/8] ppc4xx_i2c: Move register state to private struct and remove unimplemented sdata and intr registers

2018-06-13 Thread BALATON Zoltan

On Wed, 13 Jun 2018, David Gibson wrote:

On Fri, Jun 08, 2018 at 11:20:50AM +0200, BALATON Zoltan wrote:

On Fri, 8 Jun 2018, David Gibson wrote:

On Wed, Jun 06, 2018 at 03:31:48PM +0200, BALATON Zoltan wrote:

Signed-off-by: BALATON Zoltan 


It's not clear to me why this is preferable to having the registers
embedded in the state structure.  The latter is pretty standard
practice for qemu.


Maybe it will be clearer after the next patch in the series. I needed a
place to store the bitbang_i2c_interface for the directcntl way of accessing
the i2c bus but I can't include bitbang_i2c.h from the public header because
it's a local header. So I needed a local extension to the state struct. Once
I have that then it's a good place to also store private registers which are
now defined in the same file so I don't have to look up them in a different
place. This seemed clearer to me and easier to work with. Maybe the spliting
of the rewrite did not make this clear.


Oh.. right.  There's a better way.

You can just forward declare the bitbang_i2c_interface structure like
this in your header:
typdef struct bitbang_i2c_interface bitbang_i2c_interface;

So you're declaring the existence of the structure, but not its
contents - that's sufficient to create a pointer to it.  Then you
don't need to creat the substructure and extra level of indirection.


One thing I'm not sure about though:


---
 hw/i2c/ppc4xx_i2c.c | 75 +
 include/hw/i2c/ppc4xx_i2c.h | 19 ++--
 2 files changed, 43 insertions(+), 51 deletions(-)

diff --git a/hw/i2c/ppc4xx_i2c.c b/hw/i2c/ppc4xx_i2c.c
index d1936db..a68b5f7 100644
--- a/hw/i2c/ppc4xx_i2c.c
+++ b/hw/i2c/ppc4xx_i2c.c

[...]

@@ -330,7 +335,9 @@ static const MemoryRegionOps ppc4xx_i2c_ops = {
 static void ppc4xx_i2c_init(Object *o)
 {
 PPC4xxI2CState *s = PPC4xx_I2C(o);
+PPC4xxI2CRegs *r = g_malloc0(sizeof(PPC4xxI2CRegs));

+s->regs = r;
 memory_region_init_io(&s->iomem, OBJECT(s), &ppc4xx_i2c_ops, s,
   TYPE_PPC4xx_I2C, PPC4xx_I2C_MEM_SIZE);
 sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);


I allocate memory here but I'm not sure if it should be g_free'd somewhere
and if so where? I was not able to detangle QOM object hierarchies and there
seems to be no good docs available or I haven't found them. (PCI devices
seem to have unrealize methods but this did not work for I2C objects.)


Yes, if you're allocating you definitely should be free()ing.  It
should go in the corresponding cleanup routine to where it is
allocated.  Since the allocation is in instance_init(), the free()
should be in instance_finalize() (which you'd need to add).

Except that the above should let you avoid that.

..and I guess this won't actually ever be finalized in practice.

..and there doesn't seem to be a way to free up a bitbang_interface,
so even if you added the finalize, it still wouldn't really clean up
properly.


Yes, I suspected it won't matter anyway. I'll try your suggestion to just 
declare the bitbang_i2c_interface in the public header in next version.


Any more reviews to expect from you for other patches or should I send a 
v3 with the changes so far?


Thank you,
BALATON Zoltan



[Qemu-devel] [PATCH 9/9] hw/arm/virt: Add virt-3.0 machine type

2018-06-13 Thread Eric Auger
This machine type supports two new features:
- highmem 256MB ECAM (default). This feature is disabled for
  earlier machine types and if highmem is off.
- max_cpus set to 512 vcpus (255 before)

The high 256MB ECAM region is chosen instead of the legacy
16MB one if the machine type allows it, if highmem is set
(LPAE supported by the guest) and (!firmware_loaded || aarch64).
Indeed aarch32 mode FW may not support this high ECAM region.

Signed-off-by: Eric Auger 

---

PATCH: merge of ECAM and VCPU extension
- Laszlo reviewed the ECAM changes but I dropped his R-b
  due to the squash

RFC -> v1
- check firmware_loaded and aarch64 value
- do all the computation in machvirt_init
---
 hw/arm/virt.c | 36 ++--
 include/hw/arm/virt.h |  1 +
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 22b9bd1..5ed25b4 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1317,6 +1317,7 @@ static void machvirt_init(MachineState *machine)
 int n, virt_max_cpus;
 MemoryRegion *ram = g_new(MemoryRegion, 1);
 bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
+bool aarch64 = true;
 
 /* We can probe only here because during property set
  * KVM is not available yet
@@ -1432,6 +1433,8 @@ static void machvirt_init(MachineState *machine)
 numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj),
   &error_fatal);
 
+aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL);
+
 if (!vms->secure) {
 object_property_set_bool(cpuobj, false, "has_el3", NULL);
 }
@@ -1490,6 +1493,8 @@ static void machvirt_init(MachineState *machine)
 create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hd(1));
 }
 
+vms->highmem_ecam &= vms->highmem && (!firmware_loaded || aarch64);
+
 create_rtc(vms, pic);
 
 create_pcie(vms, pic);
@@ -1700,11 +1705,13 @@ static void virt_machine_class_init(ObjectClass *oc, 
void *data)
 HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
 
 mc->init = machvirt_init;
-/* Start max_cpus at the maximum QEMU supports. We'll further restrict
- * it later in machvirt_init, where we have more information about the
- * configuration of the particular instance.
+/* Start with max_cpus set to 512. This value is chosen since achievable
+ * in accelerated mode with GICv3 and recent host supporting up to 512 
vcpus
+ * and multiple redistributor region registration.
+ * This value will be refined later on once we collect more information
+ * about the configuration of the particular instance.
  */
-mc->max_cpus = 255;
+mc->max_cpus = 512;
 machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_CALXEDA_XGMAC);
 machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
 mc->block_default_type = IF_VIRTIO;
@@ -1743,7 +1750,7 @@ type_init(machvirt_machine_init);
 #define VIRT_COMPAT_2_12 \
 HW_COMPAT_2_12
 
-static void virt_2_12_instance_init(Object *obj)
+static void virt_3_0_instance_init(Object *obj)
 {
 VirtMachineState *vms = VIRT_MACHINE(obj);
 VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
@@ -1786,6 +1793,8 @@ static void virt_2_12_instance_init(Object *obj)
 "Set GIC version. "
 "Valid values are 2, 3 and host", NULL);
 
+vms->highmem_ecam = !vmc->no_highmem_ecam;
+
 if (vmc->no_its) {
 vms->its = false;
 } else {
@@ -1811,11 +1820,26 @@ static void virt_2_12_instance_init(Object *obj)
 vms->irqmap = a15irqmap;
 }
 
+static void virt_machine_3_0_options(MachineClass *mc)
+{
+}
+DEFINE_VIRT_MACHINE_AS_LATEST(3, 0)
+
+static void virt_2_12_instance_init(Object *obj)
+{
+virt_3_0_instance_init(obj);
+}
+
 static void virt_machine_2_12_options(MachineClass *mc)
 {
+VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
+virt_machine_3_0_options(mc);
 SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_12);
+vmc->no_highmem_ecam = true;
+mc->max_cpus = 255;
 }
-DEFINE_VIRT_MACHINE_AS_LATEST(2, 12)
+DEFINE_VIRT_MACHINE(2, 12)
 
 #define VIRT_COMPAT_2_11 \
 HW_COMPAT_2_11
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 2c18a59..8c74d4c 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -98,6 +98,7 @@ typedef struct {
 bool no_pmu;
 bool claim_edge_triggered_timers;
 bool smbios_old_sys_ver;
+bool no_highmem_ecam;
 } VirtMachineClass;
 
 typedef struct {
-- 
2.5.5




Re: [Qemu-devel] [PATCH v2 3/8] ppc4xx_i2c: Implement directcntl register

2018-06-13 Thread BALATON Zoltan

On Wed, 13 Jun 2018, David Gibson wrote:

On Wed, Jun 06, 2018 at 03:31:48PM +0200, BALATON Zoltan wrote:

Signed-off-by: BALATON Zoltan 
---
 default-configs/ppc-softmmu.mak|  1 +
 default-configs/ppcemb-softmmu.mak |  1 +
 hw/i2c/ppc4xx_i2c.c| 14 +-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak
index 4d7be45..7d0dc2f 100644
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -26,6 +26,7 @@ CONFIG_USB_EHCI_SYSBUS=y
 CONFIG_SM501=y
 CONFIG_IDE_SII3112=y
 CONFIG_I2C=y
+CONFIG_BITBANG_I2C=y

 # For Macs
 CONFIG_MAC=y
diff --git a/default-configs/ppcemb-softmmu.mak 
b/default-configs/ppcemb-softmmu.mak
index 67d18b2..37af193 100644
--- a/default-configs/ppcemb-softmmu.mak
+++ b/default-configs/ppcemb-softmmu.mak
@@ -19,3 +19,4 @@ CONFIG_USB_EHCI_SYSBUS=y
 CONFIG_SM501=y
 CONFIG_IDE_SII3112=y
 CONFIG_I2C=y
+CONFIG_BITBANG_I2C=y
diff --git a/hw/i2c/ppc4xx_i2c.c b/hw/i2c/ppc4xx_i2c.c
index a68b5f7..5806209 100644
--- a/hw/i2c/ppc4xx_i2c.c
+++ b/hw/i2c/ppc4xx_i2c.c
@@ -30,6 +30,7 @@
 #include "cpu.h"
 #include "hw/hw.h"
 #include "hw/i2c/ppc4xx_i2c.h"
+#include "bitbang_i2c.h"

 #define PPC4xx_I2C_MEM_SIZE 18

@@ -46,7 +47,13 @@

 #define IIC_XTCNTLSS_SRST   (1 << 0)

+#define IIC_DIRECTCNTL_SDAC (1 << 3)
+#define IIC_DIRECTCNTL_SCLC (1 << 2)
+#define IIC_DIRECTCNTL_MSDA (1 << 1)
+#define IIC_DIRECTCNTL_MSCL (1 << 0)
+
 typedef struct {
+bitbang_i2c_interface *bitbang;
 uint8_t mdata;
 uint8_t lmadr;
 uint8_t hmadr;
@@ -308,7 +315,11 @@ static void ppc4xx_i2c_writeb(void *opaque, hwaddr addr, 
uint64_t value,
 i2c->xtcntlss = value;
 break;
 case 16:
-i2c->directcntl = value & 0x7;
+i2c->directcntl = value & (IIC_DIRECTCNTL_SDAC & IIC_DIRECTCNTL_SCLC);
+i2c->directcntl |= (value & IIC_DIRECTCNTL_SCLC ? 1 : 0);
+bitbang_i2c_set(i2c->bitbang, BITBANG_I2C_SCL, i2c->directcntl & 1);


Shouldn't that use i2c->directcntl & IIC_DIRECTCNTL_MSCL ?


+i2c->directcntl |= bitbang_i2c_set(i2c->bitbang, BITBANG_I2C_SDA,
+   (value & IIC_DIRECTCNTL_SDAC) != 0) << 1;


Last expression might be clearer as:
value & IIC_DIRECTCNTL_SDAC ? IIC_DIRECTCNTL_MSDA : 0


I guess this is a matter of taste but to me IIC_DIRECTCNTL_MSDA is a bit 
position in the register so I use that when accessing that bit but when I 
check for the values of a bit being 0 or 1 I don't use the define which is 
for something else, just happens to have value 1 as well.


If this does not explain your question and you think it's better to use 
defines here I can change that in next version, please let me know.


Regards,
BALATON Zoltan



Re: [Qemu-devel] [PATCH] s390x/cpumodels: add z14 Model ZR1

2018-06-13 Thread David Hildenbrand
On 13.06.2018 10:18, Christian Borntraeger wrote:
> introduce the new z14 Model ZR1 cpu model. Mostly identical to z14, only
> the cpu type differs (3906 vs. 3907)
> 
> Signed-off-by: Christian Borntraeger 
> ---
>  target/s390x/cpu_models.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
> index e10035aaa8..cfdbccf46d 100644
> --- a/target/s390x/cpu_models.c
> +++ b/target/s390x/cpu_models.c
> @@ -79,6 +79,7 @@ static S390CPUDef s390_cpu_defs[] = {
>  CPUDEF_INIT(0x2964, 13, 2, 47, 0x0800U, "z13.2", "IBM z13 GA2"),
>  CPUDEF_INIT(0x2965, 13, 2, 47, 0x0800U, "z13s", "IBM z13s GA1"),
>  CPUDEF_INIT(0x3906, 14, 1, 47, 0x0800U, "z14", "IBM z14 GA1"),
> +CPUDEF_INIT(0x3907, 14, 1, 47, 0x0800U, "z14ZR1", "IBM z14 Model ZR1 
> GA1"),
>  };
>  
>  #define QEMU_MAX_CPU_TYPE 0x2827
> 

This is the first time that we have two different EC variants (different
CPU type) with the same HW gen and GA level .

Are they really completely equal (esp. the IBC value? that is used for
model detection) apart from the CPU type?

check_unavailable_features()/arch_query_cpu_model_comparison() will
treat both models as equal (meaning one can run on the other).

arch_query_cpu_model_baseline() might produce a z14ZR1-base when
baselining e.g. a z14 and a z14ZR1 (don't think this is a problem), as
s390_find_cpu_def() will always try to walk as far as possible in the
cpu model definition table.

-- 

Thanks,

David / dhildenb



Re: [Qemu-devel] [PATCH 1/7] spapr: Clean up cpu realize/unrealize paths

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 10:11:45 +0200
Cédric Le Goater  wrote:

> On 06/13/2018 08:57 AM, David Gibson wrote:
> > spapr_cpu_init() and spapr_cpu_destroy() are only called from the spapr
> > cpu core realize/unrealize paths, and really can only be called from there.
> > 
> > Those are all short functions, so fold the pairs together for simplicity.
> > While we're there rename some functions and change some parameter types
> > for brevity and clarity.
> > 
> > Signed-off-by: David Gibson   
> 
> Reviewed-by: Cédric Le Goater 
> 
> Still a call to spapr_cpu_reset(cpu). We should try to get rid of it
> one day.
>  

Yeah, CPU reset should ideally be triggered during machine reset or during
hotplug. There have been several tries to do so but we hit an issue with
hotplug... If we don't call spapr_cpu_reset() here then CPUState::halted
isn't set and the CPU can start executing before the hotplug path has a
chance to reset it...

https://lists.nongnu.org/archive/html/qemu-ppc/2018-04/msg00126.html

This requires a deeper investigation.

> Thanks,
> 
> C.
> 
> > ---
> >  hw/ppc/spapr_cpu_core.c | 69 +++--
> >  1 file changed, 25 insertions(+), 44 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > index f3e9b879b2..7fdb3b6667 100644
> > --- a/hw/ppc/spapr_cpu_core.c
> > +++ b/hw/ppc/spapr_cpu_core.c
> > @@ -83,26 +83,6 @@ void spapr_cpu_set_entry_state(PowerPCCPU *cpu, 
> > target_ulong nip, target_ulong r
> >  ppc_store_lpcr(cpu, env->spr[SPR_LPCR] | pcc->lpcr_pm);
> >  }
> >  
> > -static void spapr_cpu_destroy(PowerPCCPU *cpu)
> > -{
> > -qemu_unregister_reset(spapr_cpu_reset, cpu);
> > -}
> > -
> > -static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
> > -   Error **errp)
> > -{
> > -CPUPPCState *env = &cpu->env;
> > -
> > -/* Set time-base frequency to 512 MHz */
> > -cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
> > -
> > -cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
> > -kvmppc_set_papr(cpu);
> > -
> > -qemu_register_reset(spapr_cpu_reset, cpu);
> > -spapr_cpu_reset(cpu);
> > -}
> > -
> >  /*
> >   * Return the sPAPR CPU core type for @model which essentially is the CPU
> >   * model specified with -cpu cmdline option.
> > @@ -122,44 +102,47 @@ const char *spapr_get_cpu_core_type(const char 
> > *cpu_type)
> >  return object_class_get_name(oc);
> >  }
> >  
> > -static void spapr_cpu_core_unrealizefn(DeviceState *dev, Error **errp)
> > +static void spapr_unrealize_vcpu(PowerPCCPU *cpu)
> > +{
> > +qemu_unregister_reset(spapr_cpu_reset, cpu);
> > +object_unparent(cpu->intc);
> > +cpu_remove_sync(CPU(cpu));
> > +object_unparent(OBJECT(cpu));
> > +}
> > +
> > +static void spapr_cpu_core_unrealize(DeviceState *dev, Error **errp)
> >  {
> >  sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
> >  CPUCore *cc = CPU_CORE(dev);
> >  int i;
> >  
> >  for (i = 0; i < cc->nr_threads; i++) {
> > -Object *obj = OBJECT(sc->threads[i]);
> > -DeviceState *dev = DEVICE(obj);
> > -CPUState *cs = CPU(dev);
> > -PowerPCCPU *cpu = POWERPC_CPU(cs);
> > -
> > -spapr_cpu_destroy(cpu);
> > -object_unparent(cpu->intc);
> > -cpu_remove_sync(cs);
> > -object_unparent(obj);
> > +spapr_unrealize_vcpu(sc->threads[i]);
> >  }
> >  g_free(sc->threads);
> >  }
> >  
> > -static void spapr_cpu_core_realize_child(Object *child,
> > - sPAPRMachineState *spapr, Error 
> > **errp)
> > +static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> > +   Error **errp)
> >  {
> > +CPUPPCState *env = &cpu->env;
> >  Error *local_err = NULL;
> > -CPUState *cs = CPU(child);
> > -PowerPCCPU *cpu = POWERPC_CPU(cs);
> >  
> > -object_property_set_bool(child, true, "realized", &local_err);
> > +object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
> >  if (local_err) {
> >  goto error;
> >  }
> >  
> > -spapr_cpu_init(spapr, cpu, &local_err);
> > -if (local_err) {
> > -goto error;
> > -}
> > +/* Set time-base frequency to 512 MHz */
> > +cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
> > +
> > +cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
> > +kvmppc_set_papr(cpu);
> >  
> > -cpu->intc = icp_create(child, spapr->icp_type, XICS_FABRIC(spapr),
> > +qemu_register_reset(spapr_cpu_reset, cpu);
> > +spapr_cpu_reset(cpu);
> > +
> > +cpu->intc = icp_create(OBJECT(cpu), spapr->icp_type, 
> > XICS_FABRIC(spapr),
> > &local_err);
> >  if (local_err) {
> >  goto error;
> > @@ -220,9 +203,7 @@ static void spapr_cpu_core_realize(DeviceState *dev, 
> > Error **errp)
> >  }
> >  
> >  for (j = 0; j < cc->nr_threads; j++) {
> > -obj = OBJECT(sc->threads[j]);
> > -

Re: [Qemu-devel] [PATCH v4 1/2] qemu-error: introduce {error|warn}_report_once

2018-06-13 Thread Peter Xu
On Wed, Jun 13, 2018 at 10:01:22AM +0200, Markus Armbruster wrote:
> Peter Xu  writes:
> 
> > There are many error_report()s that can be used in frequently called
> > functions, especially on IO paths.  That can be unideal in that
> > malicious guest can try to trigger the error tons of time which might
> > use up the log space on the host (e.g., libvirt can capture the stderr
> > of QEMU and put it persistently onto disk).  In VT-d emulation code, we
> > have trace_vtd_error() tracer.  AFAIU all those places can be replaced
> > by something like error_report() but trace points are mostly used to
> > avoid the DDOS attack that mentioned above.  However using trace points
> > mean that errors are not dumped if trace not enabled.
> >
> > It's not a big deal in most modern server managements since we have
> > things like logrotate to maintain the logs and make sure the quota is
> > expected.  However it'll still be nice that we just provide another way
> > to restrict message generations.  In most cases, this kind of
> > error_report()s will only provide valid information on the first message
> > sent, and all the rest of similar messages will be mostly talking about
> > the same thing.  This patch introduces *_report_once() helpers to allow
> > a message to be dumped only once during one QEMU process's life cycle.
> > It will make sure: (1) it's on by deffault, so we can even get something
> 
> default
> 
> > without turning the trace on and reproducing, and (2) it won't be
> > affected by DDOS attack.
> >
> > To implement it, I stole the printk_once() macro from Linux.
> >
> > CC: Eric Blake 
> > CC: Markus Armbruster 
> > Signed-off-by: Peter Xu 
> > ---
> >  include/qemu/error-report.h | 32 
> >  1 file changed, 32 insertions(+)
> >
> > diff --git a/include/qemu/error-report.h b/include/qemu/error-report.h
> > index e1c8ae1a52..c7ec54cb97 100644
> > --- a/include/qemu/error-report.h
> > +++ b/include/qemu/error-report.h
> > @@ -44,6 +44,38 @@ void error_report(const char *fmt, ...) GCC_FMT_ATTR(1, 
> > 2);
> >  void warn_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
> >  void info_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
> >  
> > +/*
> > + * Similar to error_report(), but it only prints the message once.  It
> > + * returns true when it prints the first time, otherwise false.
> 
> I like to start function contracts with a single line stating the
> function's purpose, and I prefer imperative mood, like this:
> 
> * Similar to error_report(), but it only prints the message once.
> * Return true when it prints, false otherwise.
> 
> > + */
> > +#define error_report_once(fmt, ...) \
> > +({  \
> > +static bool print_once_;   \
> > +bool ret_print_once_ = !print_once_;  \
> > +\
> > +if (!print_once_) {\
> > +print_once_ = true;\
> > +error_report(fmt, ##__VA_ARGS__);   \
> > +}   \
> > +unlikely(ret_print_once_); \
> > +})
> 
> Please align the backslashes, say with emacs command c-backslash-region,
> bound to C-c C-\.

I am with evil mode so mostly I'm using evil-indent.  It's strange why
the patches were not indented correctly.  Now indent will be fine
locally if I redo the evil-indent.  I must have done something wrong
before. :(

> 
> > +
> > +/*
> > + * Similar to warn_report(), but it only prints the message once.  It
> > + * returns true when it prints the first time, otherwise false.
> > + */
> > +#define warn_report_once(fmt, ...)  \
> > +({  \
> > +static bool print_once_;   \
> > +bool ret_print_once_ = !print_once_;  \
> > +\
> > +if (!print_once_) {\
> > +print_once_ = true;\
> > +warn_report(fmt, ##__VA_ARGS__);   \
> > +}   \
> > +unlikely(ret_print_once_); \
> > +})
> 
> Likewise.
> 
> > +
> >  const char *error_get_progname(void);
> >  extern bool enable_timestamp_msg;
> 
> With these nits addressed:
> Reviewed-by: Markus Armbruster 
> 
> I can touch them up when I apply.

Thanks, Markus.

-- 
Peter Xu



Re: [Qemu-devel] [PATCH 2/7] pnv: Add missing error check during cpu realize()

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 16:57:02 +1000
David Gibson  wrote:

> In pnv_core_realize() we call two functions with an Error * parameter in
> succession, which means if they both cause errors we'll lose the first one.

Not exactly. The error code doesn't allow that and QEMU will abort.

static void error_setv(Error **errp,
   const char *src, int line, const char *func,
   ErrorClass err_class, const char *fmt, va_list ap,
   const char *suffix)
{
Error *err;
int saved_errno = errno;

if (errp == NULL) {
return;
}
assert(*errp == NULL);


> Add an extra test/escape to fix this.
> 
> Signed-off-by: David Gibson 
> ---
>  hw/ppc/pnv_core.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> index 13ad7d9e04..efb68226bb 100644
> --- a/hw/ppc/pnv_core.c
> +++ b/hw/ppc/pnv_core.c
> @@ -173,6 +173,9 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  
>  snprintf(name, sizeof(name), "thread[%d]", i);
>  object_property_add_child(OBJECT(pc), name, obj, &local_err);
> +if (local_err) {
> +goto err;
> +}
>  object_property_add_alias(obj, "core-pir", OBJECT(pc),
>"pir", &local_err);
>  if (local_err) {

Hmm... the current error path seems to assume failures to be
caused by object_property_add_child(). It hence unparents the
previously parented CPUs, but not the current one. So we'll
miss one call to object_unparent() if object_property_add_alias()
fails.



Re: [Qemu-devel] [PATCH 3/7] pnv_core: Allocate cpu thread objects individually

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 16:57:03 +1000
David Gibson  wrote:

> Currently, we allocate space for all the cpu objects within a single core
> in one big block.  This was copied from an older version of the spapr code
> and requires some ugly pointer manipulation to extract the individual
> objects.
> 
> This design was due to a misunderstanding of qemu lifetime conventions and
> has already been changed in spapr (in 94ad93bd "spapr_cpu_core: instantiate
> CPUs separately".
> 
> Make an equivalent change in pnv_core to get rid of the nasty pointer
> arithmetic.
> 
> Signed-off-by: David Gibson 
> ---

Reviewed-by: Greg Kurz 

>  hw/ppc/pnv.c  |  4 ++--
>  hw/ppc/pnv_core.c | 11 +--
>  include/hw/ppc/pnv_core.h |  2 +-
>  3 files changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 0314881316..0b9508d94d 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -121,9 +121,9 @@ static int get_cpus_node(void *fdt)
>   */
>  static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt)
>  {
> -CPUState *cs = CPU(DEVICE(pc->threads));
> +PowerPCCPU *cpu = pc->threads[0];
> +CPUState *cs = CPU(cpu);
>  DeviceClass *dc = DEVICE_GET_CLASS(cs);
> -PowerPCCPU *cpu = POWERPC_CPU(cs);
>  int smt_threads = CPU_CORE(pc)->nr_threads;
>  CPUPPCState *env = &cpu->env;
>  PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> index efb68226bb..59309e149c 100644
> --- a/hw/ppc/pnv_core.c
> +++ b/hw/ppc/pnv_core.c
> @@ -151,7 +151,6 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  PnvCore *pc = PNV_CORE(OBJECT(dev));
>  CPUCore *cc = CPU_CORE(OBJECT(dev));
>  const char *typename = pnv_core_cpu_typename(pc);
> -size_t size = object_type_get_instance_size(typename);
>  Error *local_err = NULL;
>  void *obj;
>  int i, j;
> @@ -165,11 +164,11 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  return;
>  }
>  
> -pc->threads = g_malloc0(size * cc->nr_threads);
> +pc->threads = g_new(PowerPCCPU *, cc->nr_threads);
>  for (i = 0; i < cc->nr_threads; i++) {
> -obj = pc->threads + i * size;
> +obj = object_new(typename);
>  
> -object_initialize(obj, size, typename);
> +pc->threads[i] = POWERPC_CPU(obj);
>  
>  snprintf(name, sizeof(name), "thread[%d]", i);
>  object_property_add_child(OBJECT(pc), name, obj, &local_err);
> @@ -185,7 +184,7 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  }
>  
>  for (j = 0; j < cc->nr_threads; j++) {
> -obj = pc->threads + j * size;
> +obj = OBJECT(pc->threads[j]);
>  
>  pnv_core_realize_child(obj, XICS_FABRIC(xi), &local_err);
>  if (local_err) {
> @@ -200,7 +199,7 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  
>  err:
>  while (--i >= 0) {
> -obj = pc->threads + i * size;
> +obj = OBJECT(pc->threads[i]);
>  object_unparent(obj);
>  }
>  g_free(pc->threads);
> diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
> index e337af7a3a..447ae761f7 100644
> --- a/include/hw/ppc/pnv_core.h
> +++ b/include/hw/ppc/pnv_core.h
> @@ -34,7 +34,7 @@ typedef struct PnvCore {
>  CPUCore parent_obj;
>  
>  /*< public >*/
> -void *threads;
> +PowerPCCPU **threads;
>  uint32_t pir;
>  
>  MemoryRegion xscom_regs;




Re: [Qemu-devel] [PATCH 2/7] pnv: Add missing error check during cpu realize()

2018-06-13 Thread Cédric Le Goater
>> index 13ad7d9e04..efb68226bb 100644
>> --- a/hw/ppc/pnv_core.c
>> +++ b/hw/ppc/pnv_core.c
>> @@ -173,6 +173,9 @@ static void pnv_core_realize(DeviceState *dev, Error 
>> **errp)
>>  
>>  snprintf(name, sizeof(name), "thread[%d]", i);
>>  object_property_add_child(OBJECT(pc), name, obj, &local_err);
>> +if (local_err) {
>> +goto err;
>> +}
>>  object_property_add_alias(obj, "core-pir", OBJECT(pc),
>>"pir", &local_err);
>>  if (local_err) {
> 
> Hmm... the current error path seems to assume failures to be
> caused by object_property_add_child(). It hence unparents the
> previously parented CPUs, but not the current one. So we'll
> miss one call to object_unparent() if object_property_add_alias()
> fails.

yes, let's just put NULL or &error_abort instead.

C. 



Re: [Qemu-devel] [PATCH 4/7] pnv: Clean up cpu realize path

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 16:57:04 +1000
David Gibson  wrote:

> pnv_cpu_init() is only called from the the pnv cpu core realize path, and
> really only can be called from there.  So fold it into its caller, which
> we also rename for brevity.
> 
> Signed-off-by: David Gibson 
> ---

Reviewed-by: Greg Kurz 

>  hw/ppc/pnv_core.c | 56 ++-
>  1 file changed, 21 insertions(+), 35 deletions(-)
> 
> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> index 59309e149c..c9648fd1ad 100644
> --- a/hw/ppc/pnv_core.c
> +++ b/hw/ppc/pnv_core.c
> @@ -54,28 +54,6 @@ static void pnv_cpu_reset(void *opaque)
>  env->msr |= MSR_HVB; /* Hypervisor mode */
>  }
>  
> -static void pnv_cpu_init(PowerPCCPU *cpu, Error **errp)
> -{
> -CPUPPCState *env = &cpu->env;
> -int core_pir;
> -int thread_index = 0; /* TODO: TCG supports only one thread */
> -ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
> -
> -core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", 
> &error_abort);
> -
> -/*
> - * The PIR of a thread is the core PIR + the thread index. We will
> - * need to find a way to get the thread index when TCG supports
> - * more than 1. We could use the object name ?
> - */
> -pir->default_value = core_pir + thread_index;
> -
> -/* Set time-base frequency to 512 MHz */
> -cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
> -
> -qemu_register_reset(pnv_cpu_reset, cpu);
> -}
> -
>  /*
>   * These values are read by the PowerNV HW monitors under Linux
>   */
> @@ -121,29 +99,39 @@ static const MemoryRegionOps pnv_core_xscom_ops = {
>  .endianness = DEVICE_BIG_ENDIAN,
>  };
>  
> -static void pnv_core_realize_child(Object *child, XICSFabric *xi, Error 
> **errp)
> +static void pnv_realize_vcpu(PowerPCCPU *cpu, XICSFabric *xi, Error **errp)
>  {
> +CPUPPCState *env = &cpu->env;
> +int core_pir;
> +int thread_index = 0; /* TODO: TCG supports only one thread */
> +ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
>  Error *local_err = NULL;
> -CPUState *cs = CPU(child);
> -PowerPCCPU *cpu = POWERPC_CPU(cs);
>  
> -object_property_set_bool(child, true, "realized", &local_err);
> +object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
>  if (local_err) {
>  error_propagate(errp, local_err);
>  return;
>  }
>  
> -cpu->intc = icp_create(child, TYPE_PNV_ICP, xi, &local_err);
> +cpu->intc = icp_create(OBJECT(cpu), TYPE_PNV_ICP, xi, &local_err);
>  if (local_err) {
>  error_propagate(errp, local_err);
>  return;
>  }
>  
> -pnv_cpu_init(cpu, &local_err);
> -if (local_err) {
> -error_propagate(errp, local_err);
> -return;
> -}
> +core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", 
> &error_abort);
> +
> +/*
> + * The PIR of a thread is the core PIR + the thread index. We will
> + * need to find a way to get the thread index when TCG supports
> + * more than 1. We could use the object name ?
> + */
> +pir->default_value = core_pir + thread_index;
> +
> +/* Set time-base frequency to 512 MHz */
> +cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
> +
> +qemu_register_reset(pnv_cpu_reset, cpu);
>  }
>  
>  static void pnv_core_realize(DeviceState *dev, Error **errp)
> @@ -184,9 +172,7 @@ static void pnv_core_realize(DeviceState *dev, Error 
> **errp)
>  }
>  
>  for (j = 0; j < cc->nr_threads; j++) {
> -obj = OBJECT(pc->threads[j]);
> -
> -pnv_core_realize_child(obj, XICS_FABRIC(xi), &local_err);
> +pnv_realize_vcpu(pc->threads[j], XICS_FABRIC(xi), &local_err);
>  if (local_err) {
>  goto err;
>  }




Re: [Qemu-devel] [PATCH] nvme: Support image creation

2018-06-13 Thread Fam Zheng
On Wed, 06/13 10:06, Kevin Wolf wrote:
> Am 13.06.2018 um 09:46 hat Fam Zheng geschrieben:
> > Similar to the host_device's implementation, we check the requested
> > length against the namespace size.
> > 
> > Truncation is necessary to make qcow2 creation work.
> > 
> > Signed-off-by: Fam Zheng 
> 
> > +static int coroutine_fn nvme_co_create_opts(const char *filename, QemuOpts 
> > *opts,
> > +Error **errp)
> > +{
> > +int ret = 0;
> > +BlockDriverState *bs = NULL;
> > +int64_t size;
> > +
> > +if (strncmp(filename, "nvme://", strlen("nvme://"))) {
> > +error_setg(errp, "Invalid filename (must start with \"nvme://\")");
> > +ret = -EINVAL;
> > +goto out;
> > +}
> > +
> > +bs = bdrv_open(filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, 
> > errp);
> > +if (!bs) {
> > +ret = -EINVAL;
> > +goto out;
> > +}
> > +
> > +size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
> > +
> > +if (size < 0 || bdrv_getlength(bs) < size) {
> > +error_setg(errp, "Invalid image size");
> > +ret = -EINVAL;
> > +}
> > +
> > +out:
> > +bdrv_unref(bs);
> > +/* Hold breath for a little while before letting image format creation 
> > run.
> > + * The problem is when testing with Intel P3700, the controller doesn't
> > + * like the immediate open after close, as a result, nvme_init() will 
> > fail.
> > + * This works around that.
> > + **/
> > +g_usleep(200);
> 
> This suggests that nbd_init() is buggy.
> 
> If we need to sleep here (for two whole seconds?!), I'm sure there are
> other cases that would have to sleep as well. So even if we can't find a
> solution other than sleeping - which feels horribly wrong - the sleep
> should probably be in nvme_init() rather than here.
> 
> What kind of error are you running into without the sleep?

The error would be the "Timeout while waiting for device to start..." in
nvme_init(), which happens after waiting for 20 seconds after setting the
device's enable bit.

If we put a sleep in nvme_init() it will hurt the blockdev-add command and QEMU
launch badly, whereas being here it hurts x-blockdev-create, qemu-img create,
etc.  Both are really bad, but the first is worse.

BTW nvme_init() already has to spin for a few seconds waiting for bit 0 in this
loop:

while (!(le32_to_cpu(s->regs->csts) & 0x1)) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to start (%"
 PRId64 " ms)",
   timeout_ms);
ret = -ETIMEDOUT;
goto fail_queue;
}
}

(we should probably insert a g_usleep(100) in the loop body, but it doesn't make
nvme_init return any faster.)

My wild guess is that the controller doesn't respond to the setting of CC.EN
(device enable) bit correctly when it is still internally busy due after a
previous reset in nvme_close(). But perhaps it probably the cleanup in
nvme_close() which is lame in the first place, compared to the complex de-init
procedure we have in vfio_pci_reset(), and that unbinding the device from Linux
nvme.ko coincidentally takes exactly 2 seconds when nvme_close() takes near 0.
What this suggests is that cleanly shutting down the device does take about two
seconds, but with the simplistic nvme_close(), the work is left asynchrously to
the controller or kernel.  I'll see if I can figure out what is missing.

Fam



Re: [Qemu-devel] [PATCH 5/7] pnv: Add cpu unrealize path

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 16:57:05 +1000
David Gibson  wrote:

> Currently we don't have any unrealize path for pnv cpu cores.  We get away
> with this because we don't yet support cpu hotplug for pnv.
> 
> However, we're going to want it eventually, and in the meantime, it makes
> it non-obvious why there are a bunch of allocations on the realize() path
> that don't have matching frees.
> 
> So, implement the missing unrealize path.
> 
> Signed-off-by: David Gibson 
> ---

Reviewed-by: Greg Kurz 

>  hw/ppc/pnv_core.c | 21 +
>  1 file changed, 21 insertions(+)
> 
> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> index c9648fd1ad..c70dbbe056 100644
> --- a/hw/ppc/pnv_core.c
> +++ b/hw/ppc/pnv_core.c
> @@ -192,6 +192,26 @@ err:
>  error_propagate(errp, local_err);
>  }
>  
> +static void pnv_unrealize_vcpu(PowerPCCPU *cpu)
> +{
> +qemu_unregister_reset(pnv_cpu_reset, cpu);
> +object_unparent(cpu->intc);
> +cpu_remove_sync(CPU(cpu));
> +object_unparent(OBJECT(cpu));
> +}
> +
> +static void pnv_core_unrealize(DeviceState *dev, Error **errp)
> +{
> +PnvCore *pc = PNV_CORE(dev);
> +CPUCore *cc = CPU_CORE(dev);
> +int i;
> +
> +for (i = 0; i < cc->nr_threads; i++) {
> +pnv_unrealize_vcpu(pc->threads[i]);
> +}
> +g_free(pc->threads);
> +}
> +
>  static Property pnv_core_properties[] = {
>  DEFINE_PROP_UINT32("pir", PnvCore, pir, 0),
>  DEFINE_PROP_END_OF_LIST(),
> @@ -202,6 +222,7 @@ static void pnv_core_class_init(ObjectClass *oc, void 
> *data)
>  DeviceClass *dc = DEVICE_CLASS(oc);
>  
>  dc->realize = pnv_core_realize;
> +dc->unrealize = pnv_core_unrealize;
>  dc->props = pnv_core_properties;
>  }
>  




Re: [Qemu-devel] [PATCH v1 01/11] pc-dimm: remove leftover "struct pc_dimms_capacity"

2018-06-13 Thread Igor Mammedov
On Mon, 11 Jun 2018 14:16:45 +0200
David Hildenbrand  wrote:

> Not needed anymore, let's drop it.
> 
> Signed-off-by: David Hildenbrand 

Reviewed-by: Igor Mammedov 

> ---
>  hw/mem/pc-dimm.c | 5 -
>  1 file changed, 5 deletions(-)
> 
> diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
> index 12da89d562..62b34a992e 100644
> --- a/hw/mem/pc-dimm.c
> +++ b/hw/mem/pc-dimm.c
> @@ -27,11 +27,6 @@
>  #include "sysemu/numa.h"
>  #include "trace.h"
>  
> -typedef struct pc_dimms_capacity {
> - uint64_t size;
> - Error**errp;
> -} pc_dimms_capacity;
> -
>  void pc_dimm_memory_plug(DeviceState *dev, MachineState *machine,
>   uint64_t align, Error **errp)
>  {




Re: [Qemu-devel] Is there a way to package QEMU binaries?

2018-06-13 Thread Daniel P . Berrangé
On Wed, Jun 13, 2018 at 12:02:59PM +0800, Peter Xu wrote:
> On Tue, Jun 12, 2018 at 09:52:45AM +0100, Peter Maydell wrote:
> > On 12 June 2018 at 07:24, Peter Xu  wrote:
> > > For example, I wanted to compile QEMU once and install it on multiple
> > > systems.  What would be the suggested way to do so?
> > 
> > For this, I would recommend using whatever the packaging
> > format for those systems is. Eg for Debian use the existing
> > Debian QEMU packages, for Redhat systems use RPMs, etc.
> > If you want a newer version of QEMU than is in the distro's
> > packages, you can probably forward port the packaging parts
> > to a newer QEMU without too much pain.
> > 
> > Or you can use a distro-agnostic packaging tool of some sort;
> > there are a few out there but I have no particular recommendations.
> 
> I'll start my investigation with RPM first.  Thanks Peter.

If you're interested in Fedora, I maintain a Copr repository which
provides RPMs for every QEMU version since 1.4.0 and every libvirt
version since 1.2.0...

  https://copr.fedorainfracloud.org/coprs/berrange/virt-ark/

Yeah, Fedora 28 is missing, but I'll be adding it real soon.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|



Re: [Qemu-devel] [PATCH 1/7] spapr: Clean up cpu realize/unrealize paths

2018-06-13 Thread David Gibson
On Wed, Jun 13, 2018 at 10:11:45AM +0200, Cédric Le Goater wrote:
> On 06/13/2018 08:57 AM, David Gibson wrote:
> > spapr_cpu_init() and spapr_cpu_destroy() are only called from the spapr
> > cpu core realize/unrealize paths, and really can only be called from there.
> > 
> > Those are all short functions, so fold the pairs together for simplicity.
> > While we're there rename some functions and change some parameter types
> > for brevity and clarity.
> > 
> > Signed-off-by: David Gibson 
> 
> Reviewed-by: Cédric Le Goater 
> 
> Still a call to spapr_cpu_reset(cpu). We should try to get rid of it
> one day.

Yeah, I know.  I'm wrestling with that along with some of the pagesize
stuff.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 4/7] pnv: Clean up cpu realize path

2018-06-13 Thread David Gibson
On Wed, Jun 13, 2018 at 10:20:43AM +0200, Cédric Le Goater wrote:
> On 06/13/2018 08:57 AM, David Gibson wrote:
> > pnv_cpu_init() is only called from the the pnv cpu core realize path, and
> > really only can be called from there.  So fold it into its caller, which
> > we also rename for brevity.
> > 
> > Signed-off-by: David Gibson 
> 
> I think we should set the default CPU settings (PIR) before creating
> the 'intc' object. I have cleanup for that in the pnv patchset. 
> Nevertheless,

Ok.

> Reviewed-by: Cédric Le Goater 
> 
> Thanks,
> 
> C.
> 
> > ---
> >  hw/ppc/pnv_core.c | 56 ++-
> >  1 file changed, 21 insertions(+), 35 deletions(-)
> > 
> > diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> > index 59309e149c..c9648fd1ad 100644
> > --- a/hw/ppc/pnv_core.c
> > +++ b/hw/ppc/pnv_core.c
> > @@ -54,28 +54,6 @@ static void pnv_cpu_reset(void *opaque)
> >  env->msr |= MSR_HVB; /* Hypervisor mode */
> >  }
> >  
> > -static void pnv_cpu_init(PowerPCCPU *cpu, Error **errp)
> > -{
> > -CPUPPCState *env = &cpu->env;
> > -int core_pir;
> > -int thread_index = 0; /* TODO: TCG supports only one thread */
> > -ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
> > -
> > -core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", 
> > &error_abort);
> > -
> > -/*
> > - * The PIR of a thread is the core PIR + the thread index. We will
> > - * need to find a way to get the thread index when TCG supports
> > - * more than 1. We could use the object name ?
> > - */
> > -pir->default_value = core_pir + thread_index;
> > -
> > -/* Set time-base frequency to 512 MHz */
> > -cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
> > -
> > -qemu_register_reset(pnv_cpu_reset, cpu);
> > -}
> > -
> >  /*
> >   * These values are read by the PowerNV HW monitors under Linux
> >   */
> > @@ -121,29 +99,39 @@ static const MemoryRegionOps pnv_core_xscom_ops = {
> >  .endianness = DEVICE_BIG_ENDIAN,
> >  };
> >  
> > -static void pnv_core_realize_child(Object *child, XICSFabric *xi, Error 
> > **errp)
> > +static void pnv_realize_vcpu(PowerPCCPU *cpu, XICSFabric *xi, Error **errp)
> >  {
> > +CPUPPCState *env = &cpu->env;
> > +int core_pir;
> > +int thread_index = 0; /* TODO: TCG supports only one thread */
> > +ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
> >  Error *local_err = NULL;
> > -CPUState *cs = CPU(child);
> > -PowerPCCPU *cpu = POWERPC_CPU(cs);
> >  
> > -object_property_set_bool(child, true, "realized", &local_err);
> > +object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
> >  if (local_err) {
> >  error_propagate(errp, local_err);
> >  return;
> >  }
> >  
> > -cpu->intc = icp_create(child, TYPE_PNV_ICP, xi, &local_err);
> > +cpu->intc = icp_create(OBJECT(cpu), TYPE_PNV_ICP, xi, &local_err);
> >  if (local_err) {
> >  error_propagate(errp, local_err);
> >  return;
> >  }
> >  
> > -pnv_cpu_init(cpu, &local_err);
> > -if (local_err) {
> > -error_propagate(errp, local_err);
> > -return;
> > -}
> > +core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", 
> > &error_abort);
> > +
> > +/*
> > + * The PIR of a thread is the core PIR + the thread index. We will
> > + * need to find a way to get the thread index when TCG supports
> > + * more than 1. We could use the object name ?
> > + */
> > +pir->default_value = core_pir + thread_index;
> > +
> > +/* Set time-base frequency to 512 MHz */
> > +cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
> > +
> > +qemu_register_reset(pnv_cpu_reset, cpu);
> >  }
> >  
> >  static void pnv_core_realize(DeviceState *dev, Error **errp)
> > @@ -184,9 +172,7 @@ static void pnv_core_realize(DeviceState *dev, Error 
> > **errp)
> >  }
> >  
> >  for (j = 0; j < cc->nr_threads; j++) {
> > -obj = OBJECT(pc->threads[j]);
> > -
> > -pnv_core_realize_child(obj, XICS_FABRIC(xi), &local_err);
> > +pnv_realize_vcpu(pc->threads[j], XICS_FABRIC(xi), &local_err);
> >  if (local_err) {
> >  goto err;
> >  }
> > 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 2/7] pnv: Add missing error check during cpu realize()

2018-06-13 Thread David Gibson
On Wed, Jun 13, 2018 at 10:15:09AM +0200, Cédric Le Goater wrote:
> On 06/13/2018 08:57 AM, David Gibson wrote:
> > In pnv_core_realize() we call two functions with an Error * parameter in
> > succession, which means if they both cause errors we'll lose the first one.
> > Add an extra test/escape to fix this.
> 
> I tend now to pass just NULL or &error_abort to object_property_add_child() 
> and object_property_add_const_link(). These calls should just not
> fail.

Hm, good point. Another day.

> 
> Reviewed-by: Cédric Le Goater 
> 
> Thanks,
> 
> C. 
> > 
> > Signed-off-by: David Gibson 
> > ---
> >  hw/ppc/pnv_core.c | 3 +++
> >  1 file changed, 3 insertions(+)
> > 
> > diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> > index 13ad7d9e04..efb68226bb 100644
> > --- a/hw/ppc/pnv_core.c
> > +++ b/hw/ppc/pnv_core.c
> > @@ -173,6 +173,9 @@ static void pnv_core_realize(DeviceState *dev, Error 
> > **errp)
> >  
> >  snprintf(name, sizeof(name), "thread[%d]", i);
> >  object_property_add_child(OBJECT(pc), name, obj, &local_err);
> > +if (local_err) {
> > +goto err;
> > +}
> >  object_property_add_alias(obj, "core-pir", OBJECT(pc),
> >"pir", &local_err);
> >  if (local_err) {
> > 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 0/3] mos6522: allow IRQs from external port pins

2018-06-13 Thread David Gibson
On Wed, Jun 13, 2018 at 09:30:12AM +0100, Mark Cave-Ayland wrote:
> Whilst testing a conversion of Laurent's q800 patchset over to use mos6522
> I discovered some issues which prevented IRQs being generated from inputs to
> external port pins.
> 
> This is a requirement for the q800 patchset which uses external clocks to
> generate periodic interrupts.
> 
> Signed-off-by: Mark Cave-Ayland 

Applied, thanks.

> 
> 
> Mark Cave-Ayland (3):
>   mos6522: only clear the shift register interrupt upon write
>   mos6522: remove additional interrupt flag filter from
> mos6522_update_irq()
>   mos6522: expose mos6522_update_irq() through MOS6522DeviceClass
> 
>  hw/misc/mos6522.c | 5 +++--
>  include/hw/misc/mos6522.h | 1 +
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v4 2/2] intel-iommu: start to use error_report_once

2018-06-13 Thread Auger Eric
Hi,
On 06/13/2018 10:05 AM, Markus Armbruster wrote:
> Peter Xu  writes:
> 
>> Replace existing trace_vtd_err() with error_report_once() then stderr
>> will capture something if any of the error happens, meanwhile we don't
>> suffer from any DDOS.  Then remove the trace point.  Since at it,
>> provide more information where proper (now we can pass parameters into
>> the report function).
>>
>> Reviewed-by: Philippe Mathieu-Daudé 
>> Signed-off-by: Peter Xu 
>> ---
>>  hw/i386/intel_iommu.c | 59 ---
>>  hw/i386/trace-events  |  1 -
>>  2 files changed, 33 insertions(+), 27 deletions(-)
> 
> Michael, would you give your Reviewed-by or Acked-by?  I'd take the
> series through my tree then.
> 
> [...]
> 
Sorry to enter this thread at this late stage. Just one question: on the
smmuv3 emulation code, Peter (Maydell) urged me to use
qemu_log_mask(LOG_GUEST_ERROR, ...) whenever the error was triggered by
a guest bad behavior. So what is the final guidance to avoid the DOS you
mention?

Thanks

Eric



Re: [Qemu-devel] [RFC v1 1/1] virtio-crypto: Allow disabling of cipher algorithms for virtio-crypto device

2018-06-13 Thread Daniel P . Berrangé
On Tue, Jun 12, 2018 at 03:48:34PM -0400, Farhan Ali wrote:
> The virtio-crypto driver currently propagates to the guest
> all the cipher algorithms that the backend cryptodev can
> support. But in certain cases where the guest has more
> performant mechanism to handle some algorithms, it would be
> useful to propagate only a subset of the algorithms.

I'm not really convinced by this.

The performance of crypto algorithms has many influencing
factors, making it pretty hard to decide which is best
without actively testing specific impls and comparing
them in a manner which matches the application usage
pattern. eg in theory the kernel crypto impl of an alg
is faster than a userspace impl, if the kernel uses
hardware accel and userspace does not. This, however,
ignores the overhead of the kernel/userspace switch.
The real world performance winner, thus depends on the
amount of data being processed in each operation. Some
times userspace can win & sometimes kernel space can
win. This is even more relevant to virtio-crypto as
it has more expensive context switches.

IOW, when we expose a virtio-crypto dev to a guest,
it is never reasonable for the guest to blindly assume
that anything it does is faster than a pure software
impl running in the guest. It will depend on the usage
pattern. This is no different to bare metal where you
should not assume kernel crypto is faster.

IMHO this is not a compelling reason to be able to turn
off algorithms in virtio-crypto, as any decision will
always be at best incomplete & inaccurate.

> @@ -853,6 +863,34 @@ static const VMStateDescription vmstate_virtio_crypto = {
>  static Property virtio_crypto_properties[] = {
>  DEFINE_PROP_LINK("cryptodev", VirtIOCrypto, conf.cryptodev,
>   TYPE_CRYPTODEV_BACKEND, CryptoDevBackend *),
> +DEFINE_PROP_BIT("no-cipher", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_ARC4, false),
> +DEFINE_PROP_BIT("cipher-arc4", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_ARC4, false),
> +DEFINE_PROP_BIT("cipher-aes-ecb", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_AES_ECB, false),
> +DEFINE_PROP_BIT("cipher-aes-cbc", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_AES_CBC, false),
> +DEFINE_PROP_BIT("cipher-aes-ctr", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_AES_CTR, false),
> +DEFINE_PROP_BIT("cipher-des-ecb", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_DES_ECB, false),
> +DEFINE_PROP_BIT("cipher-3des-ecb", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_3DES_ECB, false),
> +DEFINE_PROP_BIT("cipher-3des-cbc", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_3DES_CBC, false),
> +DEFINE_PROP_BIT("cipher-3des-ctr", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_3DES_CTR, false),
> +DEFINE_PROP_BIT("cipher-kasumi-f8", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_KASUMI_F8, false),
> +DEFINE_PROP_BIT("cipher-snow3g-uea2", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2, false),
> +DEFINE_PROP_BIT("cipher-aes-f8", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_AES_F8, false),
> +DEFINE_PROP_BIT("cipher-aes-xts", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_AES_XTS, false),
> +DEFINE_PROP_BIT("cipher-zuc-eea3", VirtIOCrypto, user_cipher_algo_l,
> +VIRTIO_CRYPTO_CIPHER_ZUC_EEA3, false),

This does not scale as an approach IMHO which just reinforces to me
that we shouldn't do this.

>  DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -974,6 +1012,8 @@ static void virtio_crypto_instance_init(Object *obj)
>   * Can be overriden with virtio_crypto_set_config_size.
>   */
>  vcrypto->config_size = sizeof(struct virtio_crypto_config);
> +vcrypto->user_cipher_algo_l = ~VIRTIO_CRYPTO_NO_CIPHER - 1;
> +vcrypto->user_cipher_algo_h = ~VIRTIO_CRYPTO_NO_CIPHER;
>  }
>  
>  static const TypeInfo virtio_crypto_info = {
> diff --git a/include/hw/virtio/virtio-crypto.h 
> b/include/hw/virtio/virtio-crypto.h
> index ca3a049..c5bb684 100644
> --- a/include/hw/virtio/virtio-crypto.h
> +++ b/include/hw/virtio/virtio-crypto.h
> @@ -97,6 +97,9 @@ typedef struct VirtIOCrypto {
>  uint32_t curr_queues;
>  size_t config_size;
>  uint8_t vhost_started;
> +
> +uint32_t user_cipher_algo_l;
> +uint32_t user_cipher_algo_h;
>  } VirtIOCrypto;
>  
>  #endif /* _QEMU_VIRTIO_CRYPTO_H */
> -- 
> 2.7.4
> 
> 

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.inst

Re: [Qemu-devel] [PATCH v1 02/11] nvdimm: no need to overwrite get_vmstate_memory_region()

2018-06-13 Thread Igor Mammedov
On Mon, 11 Jun 2018 14:16:46 +0200
David Hildenbrand  wrote:

> Our parent class (PC_DIMM) provides exactly the same function.
> 
> Signed-off-by: David Hildenbrand 

Reviewed-by: Igor Mammedov 

> ---
>  hw/mem/nvdimm.c | 6 --
>  1 file changed, 6 deletions(-)
> 
> diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
> index 4087aca25e..f974accbdd 100644
> --- a/hw/mem/nvdimm.c
> +++ b/hw/mem/nvdimm.c
> @@ -166,11 +166,6 @@ static void nvdimm_write_label_data(NVDIMMDevice 
> *nvdimm, const void *buf,
>  memory_region_set_dirty(mr, backend_offset, size);
>  }
>  
> -static MemoryRegion *nvdimm_get_vmstate_memory_region(PCDIMMDevice *dimm)
> -{
> -return host_memory_backend_get_memory(dimm->hostmem, &error_abort);
> -}
> -
>  static void nvdimm_class_init(ObjectClass *oc, void *data)
>  {
>  PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc);
> @@ -178,7 +173,6 @@ static void nvdimm_class_init(ObjectClass *oc, void *data)
>  
>  ddc->realize = nvdimm_realize;
>  ddc->get_memory_region = nvdimm_get_memory_region;
> -ddc->get_vmstate_memory_region = nvdimm_get_vmstate_memory_region;
>  
>  nvc->read_label_data = nvdimm_read_label_data;
>  nvc->write_label_data = nvdimm_write_label_data;




Re: [Qemu-devel] [PATCH] s390x/cpumodels: add z14 Model ZR1

2018-06-13 Thread Christian Borntraeger



On 06/13/2018 11:00 AM, David Hildenbrand wrote:
> On 13.06.2018 10:18, Christian Borntraeger wrote:
>> introduce the new z14 Model ZR1 cpu model. Mostly identical to z14, only
>> the cpu type differs (3906 vs. 3907)
>>
>> Signed-off-by: Christian Borntraeger 
>> ---
>>  target/s390x/cpu_models.c | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
>> index e10035aaa8..cfdbccf46d 100644
>> --- a/target/s390x/cpu_models.c
>> +++ b/target/s390x/cpu_models.c
>> @@ -79,6 +79,7 @@ static S390CPUDef s390_cpu_defs[] = {
>>  CPUDEF_INIT(0x2964, 13, 2, 47, 0x0800U, "z13.2", "IBM z13 GA2"),
>>  CPUDEF_INIT(0x2965, 13, 2, 47, 0x0800U, "z13s", "IBM z13s GA1"),
>>  CPUDEF_INIT(0x3906, 14, 1, 47, 0x0800U, "z14", "IBM z14 GA1"),
>> +CPUDEF_INIT(0x3907, 14, 1, 47, 0x0800U, "z14ZR1", "IBM z14 Model 
>> ZR1 GA1"),
>>  };
>>  
>>  #define QEMU_MAX_CPU_TYPE 0x2827
>>
> 
> This is the first time that we have two different EC variants (different
> CPU type) with the same HW gen and GA level .
> 
> Are they really completely equal (esp. the IBC value? that is used for
> model detection) apart from the CPU type?
> 
> check_unavailable_features()/arch_query_cpu_model_comparison() will
> treat both models as equal (meaning one can run on the other).
> 
> arch_query_cpu_model_baseline() might produce a z14ZR1-base when
> baselining e.g. a z14 and a z14ZR1 (don't think this is a problem), as
> s390_find_cpu_def() will always try to walk as far as possible in the
> cpu model definition table.
> 
Yes, we can run z14ZR1 on Z14 and vice versa.


s390_find_cpu_def has this

/* stop the search if we found the exact model */
if (def->type == type && def->ec_ga == ec_ga) {
return def;
}

So we should always get the best fit, no?




Re: [Qemu-devel] [PATCH] qga: check bytes count read by guest-file-read

2018-06-13 Thread Daniel P . Berrangé
On Wed, Jun 13, 2018 at 11:46:57AM +0530, P J P wrote:
> From: Prasad J Pandit 
> 
> While reading file content via 'guest-file-read' command,
> 'qmp_guest_file_read' routine allocates buffer of count+1
> bytes. It could overflow for large values of 'count'.
> Add check to avoid it.

No objection to this patch, but I would point out that even
trying to read  'UINT32_MAX - 1' bytes is going to end in
disaster. 

We'll allocate UINT32_MAX bytes of RAM to read the data.

Then we'll allocate

   (UINT32_MAX / 3 + 1) * 4 + 1)

bytes of RAM in g_base64_encode which incidentally
is not checking for integer overflow either when calling
g_malloc.

Then our JSON formatting code will allocate at least that
much RAM again, probably also not checking for overflow.

I wouldn't be surprised if we allocate that much RAM yet
again in some other part of the stack too.

> 
> Reported-by: Fakhri Zulkifli 
> Signed-off-by: Prasad J Pandit 
> ---
>  qga/commands-posix.c | 2 +-
>  qga/commands-win32.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/qga/commands-posix.c b/qga/commands-posix.c
> index eae817191b..068c0f0bd9 100644
> --- a/qga/commands-posix.c
> +++ b/qga/commands-posix.c
> @@ -458,7 +458,7 @@ struct GuestFileRead *qmp_guest_file_read(int64_t handle, 
> bool has_count,
>  
>  if (!has_count) {
>  count = QGA_READ_COUNT_DEFAULT;
> -} else if (count < 0) {
> +} else if (count < 0 || count >= UINT32_MAX) {
>  error_setg(errp, "value '%" PRId64 "' is invalid for argument count",
> count);
>  return NULL;
> diff --git a/qga/commands-win32.c b/qga/commands-win32.c
> index 70ee5379f6..73f31fa8c2 100644
> --- a/qga/commands-win32.c
> +++ b/qga/commands-win32.c
> @@ -318,7 +318,7 @@ GuestFileRead *qmp_guest_file_read(int64_t handle, bool 
> has_count,
>  }
>  if (!has_count) {
>  count = QGA_READ_COUNT_DEFAULT;
> -} else if (count < 0) {
> +} else if (count < 0 || count >= UINT32_MAX) {
>  error_setg(errp, "value '%" PRId64
> "' is invalid for argument count", count);
>  return NULL;
> -- 
> 2.17.1
> 
> 

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|



Re: [Qemu-devel] [PATCH 2/7] pnv: Add missing error check during cpu realize()

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 11:14:57 +0200
Cédric Le Goater  wrote:

> >> index 13ad7d9e04..efb68226bb 100644
> >> --- a/hw/ppc/pnv_core.c
> >> +++ b/hw/ppc/pnv_core.c
> >> @@ -173,6 +173,9 @@ static void pnv_core_realize(DeviceState *dev, Error 
> >> **errp)
> >>  
> >>  snprintf(name, sizeof(name), "thread[%d]", i);
> >>  object_property_add_child(OBJECT(pc), name, obj, &local_err);
> >> +if (local_err) {
> >> +goto err;
> >> +}
> >>  object_property_add_alias(obj, "core-pir", OBJECT(pc),
> >>"pir", &local_err);
> >>  if (local_err) {  
> > 
> > Hmm... the current error path seems to assume failures to be
> > caused by object_property_add_child(). It hence unparents the
> > previously parented CPUs, but not the current one. So we'll
> > miss one call to object_unparent() if object_property_add_alias()
> > fails.  
> 
> yes, let's just put NULL or &error_abort instead.
> 

NULL means we really don't care if the call fails or succeeds.

&error_abort means we consider a failure to be a unrecoverable bug.

So I would rather pass &error_abort here.

But if the guest is already running and functional, and we hit
the error during hotplug, does the guest really deserve to be
aborted or should we just fail the hotplug ?

> C. 




Re: [Qemu-devel] [PATCH 6/7] target/ppc: Replace intc pointer with a general machine_data pointer

2018-06-13 Thread David Gibson
On Wed, Jun 13, 2018 at 10:46:02AM +0200, Cédric Le Goater wrote:
> On 06/13/2018 08:57 AM, David Gibson wrote:
> > PowerPCCPU contains an (Object *)intc used to point to the cpu's interrupt
> > controller. Or more precisely to the "presentation" component of the
> > interrupt controller relevant to this cpu.
> 
> yes and that made sense in terms of modeling because you actually have a 
> set of wires between the presenter and the cores of a system.
> 
> > Really, this field is machine specific.  The machines which use it can
> > point it to different types of object depending on their needs, and most
> > machines don't use it at all (since they have older style PICs which don't
> > have per-cpu presentation components).
> > 
> > There's also other information that's per-cpu, but platform/machine
> > specific.  So replace the intc pointer with a (void *)machine_data which
> > can be managed as the machine type likes to conveniently store per cpu
> > information.
> 
> ah. so you have something else the store in the machine_data. 
> 
> If you were defining a type, we would have some more checks when 
> casting the machine_data field. We also could parent the object 
> to the CPU also. This is minor.

My intention is that machine_data be a "passive" structure, not a QOM
object.  Lifetime and type management are all up to the machine.

> The change should be compatible with the XIVE change which need 
> to allocate a different type of presenter. So, sPAPRCPUState and 
> PnvCPUState would look like :
> 
>   typedef struct sPAPRCPUState {
>   ICPState *icp;
>   XiveTCTX *tctx;
>   } sPAPRCPUState;

Exactly.

> and the call to ipc_create() will move in an operation of the 
> sPAPR IRQ backend, if that exists oneday, and in an operation of 
> the PnvChip to handle the differences in the interrupt controller
> in use by the machine. 
> 
> So no big difference, but the cpu machine_data won't be populated
> from the core but from the machine. I hope this is compatible
> with the next changes.

intc was already populated from the machine.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 2/7] pnv: Add missing error check during cpu realize()

2018-06-13 Thread David Gibson
On Wed, Jun 13, 2018 at 11:14:57AM +0200, Cédric Le Goater wrote:
> >> index 13ad7d9e04..efb68226bb 100644
> >> --- a/hw/ppc/pnv_core.c
> >> +++ b/hw/ppc/pnv_core.c
> >> @@ -173,6 +173,9 @@ static void pnv_core_realize(DeviceState *dev, Error 
> >> **errp)
> >>  
> >>  snprintf(name, sizeof(name), "thread[%d]", i);
> >>  object_property_add_child(OBJECT(pc), name, obj, &local_err);
> >> +if (local_err) {
> >> +goto err;
> >> +}
> >>  object_property_add_alias(obj, "core-pir", OBJECT(pc),
> >>"pir", &local_err);
> >>  if (local_err) {
> > 
> > Hmm... the current error path seems to assume failures to be
> > caused by object_property_add_child(). It hence unparents the
> > previously parented CPUs, but not the current one. So we'll
> > miss one call to object_unparent() if object_property_add_alias()
> > fails.
> 
> yes, let's just put NULL or &error_abort instead.

Yeah, good idea, I'll change it in a new spin.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[Qemu-devel] [PATCH 1/2] hw/mips/jazz: create ESP device directly via qdev

2018-06-13 Thread Mark Cave-Ayland
MIPS jazz is the last user of the legacy esp_init() function so move creation
of the ESP device over to use qdev.

Note that the esp_reset and dma_enable qemu_irqs are currently unused and so
we do not wire these up and instead remove the variables to prevent the
compiler emitting unused variable warnings.

Signed-off-by: Mark Cave-Ayland 
---
 hw/mips/mips_jazz.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/hw/mips/mips_jazz.c b/hw/mips/mips_jazz.c
index 90cb306f53..1afbe3ce6a 100644
--- a/hw/mips/mips_jazz.c
+++ b/hw/mips/mips_jazz.c
@@ -145,10 +145,10 @@ static void mips_jazz_init(MachineState *machine,
 ISABus *isa_bus;
 ISADevice *pit;
 DriveInfo *fds[MAX_FD];
-qemu_irq esp_reset, dma_enable;
 MemoryRegion *ram = g_new(MemoryRegion, 1);
 MemoryRegion *bios = g_new(MemoryRegion, 1);
 MemoryRegion *bios2 = g_new(MemoryRegion, 1);
+SysBusESPState *sysbus_esp;
 ESPState *esp;
 
 /* init CPUs */
@@ -281,8 +281,21 @@ static void mips_jazz_init(MachineState *machine,
 }
 
 /* SCSI adapter */
-esp = esp_init(0x80002000, 0, rc4030_dma_read, rc4030_dma_write, dmas[0],
-   qdev_get_gpio_in(rc4030, 5), &esp_reset, &dma_enable);
+dev = qdev_create(NULL, TYPE_ESP);
+sysbus_esp = ESP_STATE(dev);
+esp = &sysbus_esp->esp;
+esp->dma_memory_read = rc4030_dma_read;
+esp->dma_memory_write = rc4030_dma_write;
+esp->dma_opaque = dmas[0];
+sysbus_esp->it_shift = 0;
+/* XXX for now until rc4030 has been changed to use DMA enable signal */
+esp->dma_enabled = 1;
+qdev_init_nofail(dev);
+
+sysbus = SYS_BUS_DEVICE(dev);
+sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(rc4030, 5));
+sysbus_mmio_map(sysbus, 0, 0x80002000);
+
 scsi_bus_legacy_handle_cmdline(&esp->bus);
 
 /* Floppy */
-- 
2.11.0




[Qemu-devel] [PATCH 0/2] scsi: remove legacy esp_init() function

2018-06-13 Thread Mark Cave-Ayland
Something else that came out of reviewing Laurent's q800 patchset: after my
SPARC cleanups last year, MIPS Jazz is the last remaining user of the legacy
esp_init() function.

Patch 1 switches mips_jazz_init() over to create the ESP device directly
via qdev. Please note that I do not have any MIPS jazz images and so this
is compile-tested only (although passes "make check") and needs an ACK from
someone (Hervé?).

Now that the last user of esp_init() is gone, patch 2 removes the legacy
function and its associated header.

Signed-off-by: Mark Cave-Ayland 


Mark Cave-Ayland (2):
  hw/mips/jazz: create ESP device directly via qdev
  esp: remove legacy esp_init() function

 hw/mips/mips_jazz.c   | 19 ---
 hw/scsi/esp.c | 30 --
 include/hw/scsi/esp.h |  5 -
 3 files changed, 16 insertions(+), 38 deletions(-)

-- 
2.11.0




[Qemu-devel] [PATCH 2/2] esp: remove legacy esp_init() function

2018-06-13 Thread Mark Cave-Ayland
Remove the legacy esp_init() function now that there are no more remaining
users.

Signed-off-by: Mark Cave-Ayland 
---
 hw/scsi/esp.c | 30 --
 include/hw/scsi/esp.h |  5 -
 2 files changed, 35 deletions(-)

diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c
index 9ed9727744..630d923623 100644
--- a/hw/scsi/esp.c
+++ b/hw/scsi/esp.c
@@ -619,36 +619,6 @@ static const MemoryRegionOps sysbus_esp_mem_ops = {
 .valid.accepts = esp_mem_accepts,
 };
 
-ESPState *esp_init(hwaddr espaddr, int it_shift,
-   ESPDMAMemoryReadWriteFunc dma_memory_read,
-   ESPDMAMemoryReadWriteFunc dma_memory_write,
-   void *dma_opaque, qemu_irq irq, qemu_irq *reset,
-   qemu_irq *dma_enable)
-{
-DeviceState *dev;
-SysBusDevice *s;
-SysBusESPState *sysbus;
-ESPState *esp;
-
-dev = qdev_create(NULL, TYPE_ESP);
-sysbus = ESP_STATE(dev);
-esp = &sysbus->esp;
-esp->dma_memory_read = dma_memory_read;
-esp->dma_memory_write = dma_memory_write;
-esp->dma_opaque = dma_opaque;
-sysbus->it_shift = it_shift;
-/* XXX for now until rc4030 has been changed to use DMA enable signal */
-esp->dma_enabled = 1;
-qdev_init_nofail(dev);
-s = SYS_BUS_DEVICE(dev);
-sysbus_connect_irq(s, 0, irq);
-sysbus_mmio_map(s, 0, espaddr);
-*reset = qdev_get_gpio_in(dev, 0);
-*dma_enable = qdev_get_gpio_in(dev, 1);
-
-return esp;
-}
-
 static const struct SCSIBusInfo esp_scsi_info = {
 .tcq = false,
 .max_target = ESP_MAX_DEVS,
diff --git a/include/hw/scsi/esp.h b/include/hw/scsi/esp.h
index 93fdaced67..682a0d2de0 100644
--- a/include/hw/scsi/esp.h
+++ b/include/hw/scsi/esp.h
@@ -131,11 +131,6 @@ typedef struct {
 #define TCHI_FAS100A 0x4
 #define TCHI_AM53C974 0x12
 
-ESPState *esp_init(hwaddr espaddr, int it_shift,
-   ESPDMAMemoryReadWriteFunc dma_memory_read,
-   ESPDMAMemoryReadWriteFunc dma_memory_write,
-   void *dma_opaque, qemu_irq irq, qemu_irq *reset,
-   qemu_irq *dma_enable);
 void esp_dma_enable(ESPState *s, int irq, int level);
 void esp_request_cancelled(SCSIRequest *req);
 void esp_command_complete(SCSIRequest *req, uint32_t status, size_t resid);
-- 
2.11.0




Re: [Qemu-devel] [PATCH v4 0/3] ramfb: simple boot framebuffer

2018-06-13 Thread no-reply
Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20180613084149.14523-1-kra...@redhat.com
Subject: [Qemu-devel] [PATCH v4 0/3] ramfb: simple boot framebuffer

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag]   
patchew/1528879723-24675-1-git-send-email-eric.au...@redhat.com -> 
patchew/1528879723-24675-1-git-send-email-eric.au...@redhat.com
 t [tag update]
patchew/20180612221923.24469-1-mdr...@linux.vnet.ibm.com -> 
patchew/20180612221923.24469-1-mdr...@linux.vnet.ibm.com
 t [tag update]
patchew/20180613065707.30766-1-da...@gibson.dropbear.id.au -> 
patchew/20180613065707.30766-1-da...@gibson.dropbear.id.au
 * [new tag]   patchew/20180613084149.14523-1-kra...@redhat.com -> 
patchew/20180613084149.14523-1-kra...@redhat.com
Switched to a new branch 'test'
a3f6289703 hw/vfio/display: add ramfb support
e928364fde hw/display: add standalone ramfb device
271494a174 hw/display: add ramfb, a simple boot framebuffer living in guest ram

=== OUTPUT BEGIN ===
Checking PATCH 1/3: hw/display: add ramfb, a simple boot framebuffer living in 
guest ram...
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#41: 
new file mode 100644

total: 0 errors, 1 warnings, 109 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 2/3: hw/display: add standalone ramfb device...
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#71: 
new file mode 100644

total: 0 errors, 1 warnings, 141 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 3/3: hw/vfio/display: add ramfb support...
ERROR: braces {} are necessary for all arms of this statement
#31: FILE: hw/vfio/display.c:187:
+if (strcmp(object_get_typename(OBJECT(vdev)), "vfio-pci-ramfb") == 0)
[...]

ERROR: braces {} are necessary for all arms of this statement
#50: FILE: hw/vfio/display.c:311:
+if (strcmp(object_get_typename(OBJECT(vdev)), "vfio-pci-ramfb") == 0)
[...]

total: 2 errors, 0 warnings, 72 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [Qemu-devel] [PATCH] exec: Fix MAP_RAM for cached access

2018-06-13 Thread Paolo Bonzini
On 12/06/2018 21:05, Eric Auger wrote:
> When an IOMMUMemoryRegion is in front of a virtio device,
> address_space_cache_init does not set cache->ptr as the memory
> region is not RAM. However when the device performs an access,
> we end up in glue() which performs the translation and then uses
> MAP_RAM. This latter uses the unset ptr and returns a wrong value
> which leads to a SIGSEV in address_space_lduw_internal_cached_slow,
> for instance. Let's test whether the cache->ptr is set, and in
> the negative use the old macro definition. This fixes the
> use cases featuring vIOMMU (Intel and ARM SMMU) which lead to
> a SIGSEV.
> 
> Fixes: 48564041a73a (exec: reintroduce MemoryRegion caching)
> Signed-off-by: Eric Auger 
> 
> ---
> 
> I am not sure whether it doesn't break any targeted optimization
> but at least it removes the SIGSEV.

Actually cache->ptr is always NULL here, since this is the slow path
(there is even an assertion in address_space_translate_cached); so
MAP_RAM can be even simpler and, apart from the bugfix, I think we
should remove all of IS_DIRECT, MAP_RAM and INVALIDATE as a follow-up.
They were needed in the original implementation of MemoryRegionCache,
which only worked with RAM regions but not anymore now that the RAM case
is open-coded in include/exec/memory_ldst_cached.inc.h.

Thanks,

Paolo

> Signed-off-by: Eric Auger 
> ---
>  exec.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/exec.c b/exec.c
> index f6645ed..46fbd25 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -3800,7 +3800,9 @@ address_space_write_cached_slow(MemoryRegionCache 
> *cache, hwaddr addr,
>  #define SUFFIX   _cached_slow
>  #define TRANSLATE(...)   address_space_translate_cached(cache, 
> __VA_ARGS__)
>  #define IS_DIRECT(mr, is_write)  memory_access_is_direct(mr, is_write)
> -#define MAP_RAM(mr, ofs) (cache->ptr + (ofs - cache->xlat))
> +#define MAP_RAM(mr, ofs) (cache->ptr ? \
> + (cache->ptr + (ofs - cache->xlat)) :  \
> + qemu_map_ram_ptr((mr)->ram_block, ofs))
>  #define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len)
>  #define RCU_READ_LOCK()  ((void)0)
>  #define RCU_READ_UNLOCK()((void)0)
> 




Re: [Qemu-devel] [PATCH] s390x/cpumodels: add z14 Model ZR1

2018-06-13 Thread Christian Borntraeger



On 06/13/2018 11:00 AM, David Hildenbrand wrote:
> On 13.06.2018 10:18, Christian Borntraeger wrote:
>> introduce the new z14 Model ZR1 cpu model. Mostly identical to z14, only
>> the cpu type differs (3906 vs. 3907)
>>
>> Signed-off-by: Christian Borntraeger 
>> ---
>>  target/s390x/cpu_models.c | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
>> index e10035aaa8..cfdbccf46d 100644
>> --- a/target/s390x/cpu_models.c
>> +++ b/target/s390x/cpu_models.c
>> @@ -79,6 +79,7 @@ static S390CPUDef s390_cpu_defs[] = {
>>  CPUDEF_INIT(0x2964, 13, 2, 47, 0x0800U, "z13.2", "IBM z13 GA2"),
>>  CPUDEF_INIT(0x2965, 13, 2, 47, 0x0800U, "z13s", "IBM z13s GA1"),
>>  CPUDEF_INIT(0x3906, 14, 1, 47, 0x0800U, "z14", "IBM z14 GA1"),
>> +CPUDEF_INIT(0x3907, 14, 1, 47, 0x0800U, "z14ZR1", "IBM z14 Model 
>> ZR1 GA1"),
>>  };
>>  
>>  #define QEMU_MAX_CPU_TYPE 0x2827
>>
> 
> This is the first time that we have two different EC variants (different
> CPU type) with the same HW gen and GA level .
> 
> Are they really completely equal (esp. the IBC value? that is used for
> model detection) apart from the CPU type?

Yes, the IBC is the same.




Re: [Qemu-devel] [PATCH 2/7] pnv: Add missing error check during cpu realize()

2018-06-13 Thread David Gibson
On Wed, Jun 13, 2018 at 11:42:07AM +0200, Greg Kurz wrote:
> On Wed, 13 Jun 2018 11:14:57 +0200
> Cédric Le Goater  wrote:
> 
> > >> index 13ad7d9e04..efb68226bb 100644
> > >> --- a/hw/ppc/pnv_core.c
> > >> +++ b/hw/ppc/pnv_core.c
> > >> @@ -173,6 +173,9 @@ static void pnv_core_realize(DeviceState *dev, Error 
> > >> **errp)
> > >>  
> > >>  snprintf(name, sizeof(name), "thread[%d]", i);
> > >>  object_property_add_child(OBJECT(pc), name, obj, &local_err);
> > >> +if (local_err) {
> > >> +goto err;
> > >> +}
> > >>  object_property_add_alias(obj, "core-pir", OBJECT(pc),
> > >>"pir", &local_err);
> > >>  if (local_err) {  
> > > 
> > > Hmm... the current error path seems to assume failures to be
> > > caused by object_property_add_child(). It hence unparents the
> > > previously parented CPUs, but not the current one. So we'll
> > > miss one call to object_unparent() if object_property_add_alias()
> > > fails.  
> > 
> > yes, let's just put NULL or &error_abort instead.
> > 
> 
> NULL means we really don't care if the call fails or succeeds.
> 
> &error_abort means we consider a failure to be a unrecoverable bug.
> 
> So I would rather pass &error_abort here.
> 
> But if the guest is already running and functional, and we hit
> the error during hotplug, does the guest really deserve to be
> aborted or should we just fail the hotplug ?

Ah, dammit, that's why it wasn't an abort in the first place.  Yeah,
we'd better propagate the errors.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v2 1/2] memfd: fix possible usage of the uninitialized file descriptor

2018-06-13 Thread Marc-André Lureau
On Wed, Jun 13, 2018 at 10:19 AM, Dima Stepanov  wrote:
> The qemu_memfd_alloc_check() routine allocates the fd variable on stack.
> This variable is initialized inside the qemu_memfd_alloc() function.
> There are several cases when *fd will be left unintialized which can
> lead to the unexpected close() in the qemu_memfd_free() call.
>
> Set file descriptor to -1 before calling the qemu_memfd_alloc routine.
>
> Signed-off-by: Dima Stepanov 

Reviewed-by: Marc-André Lureau 

> ---
>  util/memfd.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/util/memfd.c b/util/memfd.c
> index d248a53..6287946 100644
> --- a/util/memfd.c
> +++ b/util/memfd.c
> @@ -187,6 +187,7 @@ bool qemu_memfd_alloc_check(void)
>  int fd;
>  void *ptr;
>
> +fd = -1;
>  ptr = qemu_memfd_alloc("test", 4096, 0, &fd, NULL);
>  memfd_check = ptr ? MEMFD_OK : MEMFD_KO;
>  qemu_memfd_free(ptr, 4096, fd);
> --
> 2.7.4
>
>



-- 
Marc-André Lureau



Re: [Qemu-devel] [PATCH v2 3/8] ppc4xx_i2c: Implement directcntl register

2018-06-13 Thread David Gibson
On Wed, Jun 13, 2018 at 10:54:22AM +0200, BALATON Zoltan wrote:
> On Wed, 13 Jun 2018, David Gibson wrote:
> > On Wed, Jun 06, 2018 at 03:31:48PM +0200, BALATON Zoltan wrote:
> > > Signed-off-by: BALATON Zoltan 
> > > ---
> > >  default-configs/ppc-softmmu.mak|  1 +
> > >  default-configs/ppcemb-softmmu.mak |  1 +
> > >  hw/i2c/ppc4xx_i2c.c| 14 +-
> > >  3 files changed, 15 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/default-configs/ppc-softmmu.mak 
> > > b/default-configs/ppc-softmmu.mak
> > > index 4d7be45..7d0dc2f 100644
> > > --- a/default-configs/ppc-softmmu.mak
> > > +++ b/default-configs/ppc-softmmu.mak
> > > @@ -26,6 +26,7 @@ CONFIG_USB_EHCI_SYSBUS=y
> > >  CONFIG_SM501=y
> > >  CONFIG_IDE_SII3112=y
> > >  CONFIG_I2C=y
> > > +CONFIG_BITBANG_I2C=y
> > > 
> > >  # For Macs
> > >  CONFIG_MAC=y
> > > diff --git a/default-configs/ppcemb-softmmu.mak 
> > > b/default-configs/ppcemb-softmmu.mak
> > > index 67d18b2..37af193 100644
> > > --- a/default-configs/ppcemb-softmmu.mak
> > > +++ b/default-configs/ppcemb-softmmu.mak
> > > @@ -19,3 +19,4 @@ CONFIG_USB_EHCI_SYSBUS=y
> > >  CONFIG_SM501=y
> > >  CONFIG_IDE_SII3112=y
> > >  CONFIG_I2C=y
> > > +CONFIG_BITBANG_I2C=y
> > > diff --git a/hw/i2c/ppc4xx_i2c.c b/hw/i2c/ppc4xx_i2c.c
> > > index a68b5f7..5806209 100644
> > > --- a/hw/i2c/ppc4xx_i2c.c
> > > +++ b/hw/i2c/ppc4xx_i2c.c
> > > @@ -30,6 +30,7 @@
> > >  #include "cpu.h"
> > >  #include "hw/hw.h"
> > >  #include "hw/i2c/ppc4xx_i2c.h"
> > > +#include "bitbang_i2c.h"
> > > 
> > >  #define PPC4xx_I2C_MEM_SIZE 18
> > > 
> > > @@ -46,7 +47,13 @@
> > > 
> > >  #define IIC_XTCNTLSS_SRST   (1 << 0)
> > > 
> > > +#define IIC_DIRECTCNTL_SDAC (1 << 3)
> > > +#define IIC_DIRECTCNTL_SCLC (1 << 2)
> > > +#define IIC_DIRECTCNTL_MSDA (1 << 1)
> > > +#define IIC_DIRECTCNTL_MSCL (1 << 0)
> > > +
> > >  typedef struct {
> > > +bitbang_i2c_interface *bitbang;
> > >  uint8_t mdata;
> > >  uint8_t lmadr;
> > >  uint8_t hmadr;
> > > @@ -308,7 +315,11 @@ static void ppc4xx_i2c_writeb(void *opaque, hwaddr 
> > > addr, uint64_t value,
> > >  i2c->xtcntlss = value;
> > >  break;
> > >  case 16:
> > > -i2c->directcntl = value & 0x7;
> > > +i2c->directcntl = value & (IIC_DIRECTCNTL_SDAC & 
> > > IIC_DIRECTCNTL_SCLC);
> > > +i2c->directcntl |= (value & IIC_DIRECTCNTL_SCLC ? 1 : 0);
> > > +bitbang_i2c_set(i2c->bitbang, BITBANG_I2C_SCL, i2c->directcntl & 
> > > 1);
> > 
> > Shouldn't that use i2c->directcntl & IIC_DIRECTCNTL_MSCL ?
> > 
> > > +i2c->directcntl |= bitbang_i2c_set(i2c->bitbang, BITBANG_I2C_SDA,
> > > +   (value & IIC_DIRECTCNTL_SDAC) != 0) << 1;
> > 
> > Last expression might be clearer as:
> > value & IIC_DIRECTCNTL_SDAC ? IIC_DIRECTCNTL_MSDA : 0
> 
> I guess this is a matter of taste but to me IIC_DIRECTCNTL_MSDA is a bit
> position in the register so I use that when accessing that bit but when I
> check for the values of a bit being 0 or 1 I don't use the define which is
> for something else, just happens to have value 1 as well.

Hmm.. but the bit is being store in i2c->directcntl, which means it
can be read back from the register in that position, no?

> If this does not explain your question and you think it's better to use
> defines here I can change that in next version, please let me know.
> 
> Regards,
> BALATON Zoltan
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [Qemu-ppc] [PATCH v2 5/8] hw/timer: Add basic M41T80 emulation

2018-06-13 Thread David Gibson
On Wed, Jun 13, 2018 at 10:50:59AM +0200, BALATON Zoltan wrote:
> On Wed, 13 Jun 2018, David Gibson wrote:
> > On Wed, Jun 06, 2018 at 07:35:28PM +0200, BALATON Zoltan wrote:
> > > On Wed, 6 Jun 2018, Philippe Mathieu-Daudé wrote:
> > > > On 06/06/2018 10:31 AM, BALATON Zoltan wrote:
> > > > > Basic emulation of the M41T80 serial (I2C) RTC chip. Only getting time
> > > > > of day is implemented. Setting time and RTC alarm are not supported.
> > > [...]
> > > > > diff --git a/hw/timer/m41t80.c b/hw/timer/m41t80.c
> > > > > new file mode 100644
> > > > > index 000..9dbdb1b
> > > > > --- /dev/null
> > > > > +++ b/hw/timer/m41t80.c
> > > > > @@ -0,0 +1,117 @@
> > > > > +/*
> > > > > + * M41T80 serial rtc emulation
> > > > > + *
> > > > > + * Copyright (c) 2018 BALATON Zoltan
> > > > > + *
> > > > > + * This work is licensed under the GNU GPL license version 2 or 
> > > > > later.
> > > > > + *
> > > > > + */
> > > > > +
> > > > > +#include "qemu/osdep.h"
> > > > > +#include "qemu/log.h"
> > > > > +#include "qemu/timer.h"
> > > > > +#include "qemu/bcd.h"
> > > > > +#include "hw/i2c/i2c.h"
> > > > > +
> > > > > +#define TYPE_M41T80 "m41t80"
> > > > > +#define M41T80(obj) OBJECT_CHECK(M41t80State, (obj), TYPE_M41T80)
> > > > > +
> > > > > +typedef struct M41t80State {
> > > > > +I2CSlave parent_obj;
> > > > > +int8_t addr;
> > > > > +} M41t80State;
> > > > > +
> > > > > +static void m41t80_realize(DeviceState *dev, Error **errp)
> > > > > +{
> > > > > +M41t80State *s = M41T80(dev);
> > > > > +
> > > > > +s->addr = -1;
> > > > > +}
> > > > > +
> > > > > +static int m41t80_send(I2CSlave *i2c, uint8_t data)
> > > > > +{
> > > > > +M41t80State *s = M41T80(i2c);
> > > > > +
> > > > > +if (s->addr < 0) {
> > > > > +s->addr = data;
> > > > > +} else {
> > > > > +s->addr++;
> > > > > +}
> > > > 
> > > > What about adding enum i2c_event in M41t80State and use the enum here
> > > > rather than the addr < 0? Also this wrap at INT8_MAX = 127, is this
> > > > expected?
> > > 
> > > Thanks for the review. I guess we could add enum for device bytes and the
> > > special case -1 meaning no register address selected yet but this is a 
> > > very
> > > simple device with only 20 bytes and the datasheet also lists them by 
> > > number
> > > without naming them so I think we can also refer to them by number. Since
> > > the device has only this 20 bytes the case with 127 should also not be a
> > > problem as that's invalid address anyway. Or did you mean something else?
> > 
> > So, I'm not particularly in favour of adding extra state variables.
> > 
> > But is using addr < 0 safe here?  You're assigning the uint8_t data to
> > addr - could that result in a negative value?
> 
> Why wouldn't it be safe with the expected values for register address
> between 0-19? If the guest sends garbage values over 127 it will either
> result in invalid register or unselected register and lead to an error when
> trying to read/write that register so I don't see what other problem this
> may cause.

Ok, but where is that enforced?

> The addr < 0 is to check if no address was selected before (on creating the
> device and when sending first value from host addr is set to -1. In this
> case first write will set register address, then subsequent reads/writes
> increment register address as the datasheet says).
> 
> Regards,
> BALATON Zoltan


-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v2 2/8] ppc4xx_i2c: Move register state to private struct and remove unimplemented sdata and intr registers

2018-06-13 Thread David Gibson
On Wed, Jun 13, 2018 at 10:56:59AM +0200, BALATON Zoltan wrote:
> On Wed, 13 Jun 2018, David Gibson wrote:
> > On Fri, Jun 08, 2018 at 11:20:50AM +0200, BALATON Zoltan wrote:
> > > On Fri, 8 Jun 2018, David Gibson wrote:
> > > > On Wed, Jun 06, 2018 at 03:31:48PM +0200, BALATON Zoltan wrote:
> > > > > Signed-off-by: BALATON Zoltan 
> > > > 
> > > > It's not clear to me why this is preferable to having the registers
> > > > embedded in the state structure.  The latter is pretty standard
> > > > practice for qemu.
> > > 
> > > Maybe it will be clearer after the next patch in the series. I needed a
> > > place to store the bitbang_i2c_interface for the directcntl way of 
> > > accessing
> > > the i2c bus but I can't include bitbang_i2c.h from the public header 
> > > because
> > > it's a local header. So I needed a local extension to the state struct. 
> > > Once
> > > I have that then it's a good place to also store private registers which 
> > > are
> > > now defined in the same file so I don't have to look up them in a 
> > > different
> > > place. This seemed clearer to me and easier to work with. Maybe the 
> > > spliting
> > > of the rewrite did not make this clear.
> > 
> > Oh.. right.  There's a better way.
> > 
> > You can just forward declare the bitbang_i2c_interface structure like
> > this in your header:
> > typdef struct bitbang_i2c_interface bitbang_i2c_interface;
> > 
> > So you're declaring the existence of the structure, but not its
> > contents - that's sufficient to create a pointer to it.  Then you
> > don't need to creat the substructure and extra level of indirection.
> > 
> > > One thing I'm not sure about though:
> > > 
> > > > > ---
> > > > >  hw/i2c/ppc4xx_i2c.c | 75 
> > > > > +
> > > > >  include/hw/i2c/ppc4xx_i2c.h | 19 ++--
> > > > >  2 files changed, 43 insertions(+), 51 deletions(-)
> > > > > 
> > > > > diff --git a/hw/i2c/ppc4xx_i2c.c b/hw/i2c/ppc4xx_i2c.c
> > > > > index d1936db..a68b5f7 100644
> > > > > --- a/hw/i2c/ppc4xx_i2c.c
> > > > > +++ b/hw/i2c/ppc4xx_i2c.c
> > > [...]
> > > > > @@ -330,7 +335,9 @@ static const MemoryRegionOps ppc4xx_i2c_ops = {
> > > > >  static void ppc4xx_i2c_init(Object *o)
> > > > >  {
> > > > >  PPC4xxI2CState *s = PPC4xx_I2C(o);
> > > > > +PPC4xxI2CRegs *r = g_malloc0(sizeof(PPC4xxI2CRegs));
> > > > > 
> > > > > +s->regs = r;
> > > > >  memory_region_init_io(&s->iomem, OBJECT(s), &ppc4xx_i2c_ops, s,
> > > > >TYPE_PPC4xx_I2C, PPC4xx_I2C_MEM_SIZE);
> > > > >  sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
> > > 
> > > I allocate memory here but I'm not sure if it should be g_free'd somewhere
> > > and if so where? I was not able to detangle QOM object hierarchies and 
> > > there
> > > seems to be no good docs available or I haven't found them. (PCI devices
> > > seem to have unrealize methods but this did not work for I2C objects.)
> > 
> > Yes, if you're allocating you definitely should be free()ing.  It
> > should go in the corresponding cleanup routine to where it is
> > allocated.  Since the allocation is in instance_init(), the free()
> > should be in instance_finalize() (which you'd need to add).
> > 
> > Except that the above should let you avoid that.
> > 
> > ..and I guess this won't actually ever be finalized in practice.
> > 
> > ..and there doesn't seem to be a way to free up a bitbang_interface,
> > so even if you added the finalize, it still wouldn't really clean up
> > properly.
> 
> Yes, I suspected it won't matter anyway. I'll try your suggestion to just
> declare the bitbang_i2c_interface in the public header in next version.
> 
> Any more reviews to expect from you for other patches or should I send a v3
> with the changes so far?

Go ahead with v3.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 1/2] hw/mips/jazz: create ESP device directly via qdev

2018-06-13 Thread Paolo Bonzini
On 13/06/2018 11:47, Mark Cave-Ayland wrote:
> +dev = qdev_create(NULL, TYPE_ESP);
> +sysbus_esp = ESP_STATE(dev);
> +esp = &sysbus_esp->esp;
> +esp->dma_memory_read = rc4030_dma_read;
> +esp->dma_memory_write = rc4030_dma_write;
> +esp->dma_opaque = dmas[0];

Poking at the functions here is a bit ugly, and it's the last user of
rc4030_dma_{read,write}.  It would be nicer if ESP could get a memory
region like it's done a bit above for the NIC.  I guess it's not a big
deal, but perhaps there could be a TODO comment.

I'm mostly mentioning this because Hervé is copied and because SPARC DMA
has the same issue of using function pointers instead of an IOMMU memory
region...

Thanks,

Paolo

> +sysbus_esp->it_shift = 0;
> +/* XXX for now until rc4030 has been changed to use DMA enable signal */
> +esp->dma_enabled = 1;
> +qdev_init_nofail(dev);
> +
> +sysbus = SYS_BUS_DEVICE(dev);
> +sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(rc4030, 5));
> +sysbus_mmio_map(sysbus, 0, 0x80002000);
> +




Re: [Qemu-devel] [PATCH v1 03/11] pc: factor out pc-dimm checks into pc_dimm_pre_plug()

2018-06-13 Thread Igor Mammedov
On Mon, 11 Jun 2018 14:16:47 +0200
David Hildenbrand  wrote:

> We can perform these checks before the device is actually realized.
> 
> Signed-off-by: David Hildenbrand 

Reviewed-by: Igor Mammedov 

> ---
>  hw/i386/pc.c | 44 ++--
>  1 file changed, 26 insertions(+), 18 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index f3befe6721..85c040482e 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1674,6 +1674,29 @@ void ioapic_init_gsi(GSIState *gsi_state, const char 
> *parent_name)
>  }
>  }
>  
> +static void pc_dimm_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> + Error **errp)
> +{
> +const PCMachineState *pcms = PC_MACHINE(hotplug_dev);
> +const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
> +
> +/*
> + * When -no-acpi is used with Q35 machine type, no ACPI is built,
> + * but pcms->acpi_dev is still created. Check !acpi_enabled in
> + * addition to cover this case.
> + */
> +if (!pcms->acpi_dev || !acpi_enabled) {
> +error_setg(errp,
> +   "memory hotplug is not enabled: missing acpi device or 
> acpi disabled");
> +return;
> +}
> +
> +if (is_nvdimm && !pcms->acpi_nvdimm_state.is_enabled) {
> +error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'");
> +return;
> +}
> +}
> +
>  static void pc_dimm_plug(HotplugHandler *hotplug_dev,
>   DeviceState *dev, Error **errp)
>  {
> @@ -1696,23 +1719,6 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
>  align = memory_region_get_alignment(mr);
>  }
>  
> -/*
> - * When -no-acpi is used with Q35 machine type, no ACPI is built,
> - * but pcms->acpi_dev is still created. Check !acpi_enabled in
> - * addition to cover this case.
> - */
> -if (!pcms->acpi_dev || !acpi_enabled) {
> -error_setg(&local_err,
> -   "memory hotplug is not enabled: missing acpi device or 
> acpi disabled");
> -goto out;
> -}
> -
> -if (is_nvdimm && !pcms->acpi_nvdimm_state.is_enabled) {
> -error_setg(&local_err,
> -   "nvdimm is not enabled: missing 'nvdimm' in '-M'");
> -goto out;
> -}
> -
>  pc_dimm_memory_plug(dev, MACHINE(pcms), align, &local_err);
>  if (local_err) {
>  goto out;
> @@ -2006,7 +2012,9 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
>  static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
>DeviceState *dev, Error **errp)
>  {
> -if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> +if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> +pc_dimm_pre_plug(hotplug_dev, dev, errp);
> +} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
>  pc_cpu_pre_plug(hotplug_dev, dev, errp);
>  }
>  }




Re: [Qemu-devel] [PATCH 6/7] target/ppc: Replace intc pointer with a general machine_data pointer

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 16:57:06 +1000
David Gibson  wrote:

> PowerPCCPU contains an (Object *)intc used to point to the cpu's interrupt
> controller.  Or more precisely to the "presentation" component of the
> interrupt controller relevant to this cpu.
> 
> Really, this field is machine specific.  The machines which use it can
> point it to different types of object depending on their needs, and most
> machines don't use it at all (since they have older style PICs which don't
> have per-cpu presentation components).
> 
> There's also other information that's per-cpu, but platform/machine
> specific.  So replace the intc pointer with a (void *)machine_data which
> can be managed as the machine type likes to conveniently store per cpu
> information.
> 
> Signed-off-by: David Gibson 
> ---

This looks good, just one question below...

>  hw/intc/xics.c  |  5 +++--
>  hw/intc/xics_spapr.c| 16 +++-
>  hw/ppc/pnv.c|  4 ++--
>  hw/ppc/pnv_core.c   | 11 +--
>  hw/ppc/spapr.c  |  8 
>  hw/ppc/spapr_cpu_core.c | 13 ++---
>  include/hw/ppc/pnv_core.h   |  9 +
>  include/hw/ppc/spapr_cpu_core.h | 10 ++
>  include/hw/ppc/xics.h   |  4 ++--
>  target/ppc/cpu.h|  2 +-
>  10 files changed, 61 insertions(+), 21 deletions(-)
> 
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index e73e623e3b..689ad44e5f 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -383,7 +383,8 @@ static const TypeInfo icp_info = {
>  .class_size = sizeof(ICPStateClass),
>  };
>  
> -Object *icp_create(Object *cpu, const char *type, XICSFabric *xi, Error 
> **errp)
> +ICPState *icp_create(Object *cpu, const char *type, XICSFabric *xi,
> + Error **errp)
>  {
>  Error *local_err = NULL;
>  Object *obj;
> @@ -401,7 +402,7 @@ Object *icp_create(Object *cpu, const char *type, 
> XICSFabric *xi, Error **errp)
>  obj = NULL;
>  }
>  
> -return obj;
> +return ICP(obj);
>  }
>  
>  /*
> diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
> index 2e27b92b87..01c76717cf 100644
> --- a/hw/intc/xics_spapr.c
> +++ b/hw/intc/xics_spapr.c
> @@ -31,6 +31,7 @@
>  #include "trace.h"
>  #include "qemu/timer.h"
>  #include "hw/ppc/spapr.h"
> +#include "hw/ppc/spapr_cpu_core.h"
>  #include "hw/ppc/xics.h"
>  #include "hw/ppc/fdt.h"
>  #include "qapi/visitor.h"
> @@ -43,8 +44,9 @@ static target_ulong h_cppr(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
> target_ulong opcode, target_ulong *args)
>  {
>  target_ulong cppr = args[0];
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
>  
> -icp_set_cppr(ICP(cpu->intc), cppr);
> +icp_set_cppr(spapr_cpu->icp, cppr);
>  return H_SUCCESS;
>  }
>  
> @@ -65,7 +67,8 @@ static target_ulong h_ipi(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
>  static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> target_ulong opcode, target_ulong *args)
>  {
> -uint32_t xirr = icp_accept(ICP(cpu->intc));
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
> +uint32_t xirr = icp_accept(spapr_cpu->icp);
>  
>  args[0] = xirr;
>  return H_SUCCESS;
> @@ -74,7 +77,8 @@ static target_ulong h_xirr(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
>  static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>   target_ulong opcode, target_ulong *args)
>  {
> -uint32_t xirr = icp_accept(ICP(cpu->intc));
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
> +uint32_t xirr = icp_accept(spapr_cpu->icp);
>  
>  args[0] = xirr;
>  args[1] = cpu_get_host_ticks();
> @@ -84,9 +88,10 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
>  static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>target_ulong opcode, target_ulong *args)
>  {
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
>  target_ulong xirr = args[0];
>  
> -icp_eoi(ICP(cpu->intc), xirr);
> +icp_eoi(spapr_cpu->icp, xirr);
>  return H_SUCCESS;
>  }
>  
> @@ -94,7 +99,8 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
>  target_ulong opcode, target_ulong *args)
>  {
>  uint32_t mfrr;
> -uint32_t xirr = icp_ipoll(ICP(cpu->intc), &mfrr);
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
> +uint32_t xirr = icp_ipoll(spapr_cpu->icp, &mfrr);
>  
>  args[0] = xirr;
>  args[1] = mfrr;
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 0b9508d94d..3a36c6ac6a 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -1013,7 +1013,7 @@ static ICPState *pnv_icp_get(XICSFabric *xi, int pir)
>  {
>  PowerPCCPU *cpu = ppc_get_vcpu_by_pir(pir);
>  
> -return cpu ? ICP(cpu->intc) : NULL;
> +return cpu ? pnv_cpu_state(cpu)->icp : NULL;
>  }
>  
>  sta

Re: [Qemu-devel] [PATCH v1 04/11] hostmem: drop error variable from host_memory_backend_get_memory()

2018-06-13 Thread Igor Mammedov
On Mon, 11 Jun 2018 14:16:48 +0200
David Hildenbrand  wrote:

> Unused, so let's remove it.
> 
> Signed-off-by: David Hildenbrand 

Reviewed-by: Igor Mammedov 

> ---
>  backends/hostmem.c   | 3 +--
>  hw/mem/nvdimm.c  | 4 ++--
>  hw/mem/pc-dimm.c | 4 ++--
>  hw/misc/ivshmem.c| 3 +--
>  include/sysemu/hostmem.h | 3 +--
>  numa.c   | 3 +--
>  6 files changed, 8 insertions(+), 12 deletions(-)
> 
> diff --git a/backends/hostmem.c b/backends/hostmem.c
> index 3627e61584..4908946cd3 100644
> --- a/backends/hostmem.c
> +++ b/backends/hostmem.c
> @@ -247,8 +247,7 @@ bool host_memory_backend_mr_inited(HostMemoryBackend 
> *backend)
>  return memory_region_size(&backend->mr) != 0;
>  }
>  
> -MemoryRegion *
> -host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
> +MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
>  {
>  return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
>  }
> diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
> index f974accbdd..df9716231f 100644
> --- a/hw/mem/nvdimm.c
> +++ b/hw/mem/nvdimm.c
> @@ -105,7 +105,7 @@ static MemoryRegion 
> *nvdimm_get_memory_region(PCDIMMDevice *dimm, Error **errp)
>  
>  static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
>  {
> -MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem, errp);
> +MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem);
>  NVDIMMDevice *nvdimm = NVDIMM(dimm);
>  uint64_t align, pmem_size, size = memory_region_size(mr);
>  
> @@ -161,7 +161,7 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, 
> const void *buf,
>  
>  memcpy(nvdimm->label_data + offset, buf, size);
>  
> -mr = host_memory_backend_get_memory(dimm->hostmem, &error_abort);
> +mr = host_memory_backend_get_memory(dimm->hostmem);
>  backend_offset = memory_region_size(mr) - nvdimm->label_size + offset;
>  memory_region_set_dirty(mr, backend_offset, size);
>  }
> diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
> index 62b34a992e..86fbcf2d0c 100644
> --- a/hw/mem/pc-dimm.c
> +++ b/hw/mem/pc-dimm.c
> @@ -224,12 +224,12 @@ static MemoryRegion 
> *pc_dimm_get_memory_region(PCDIMMDevice *dimm, Error **errp)
>  return NULL;
>  }
>  
> -return host_memory_backend_get_memory(dimm->hostmem, errp);
> +return host_memory_backend_get_memory(dimm->hostmem);
>  }
>  
>  static MemoryRegion *pc_dimm_get_vmstate_memory_region(PCDIMMDevice *dimm)
>  {
> -return host_memory_backend_get_memory(dimm->hostmem, &error_abort);
> +return host_memory_backend_get_memory(dimm->hostmem);
>  }
>  
>  static uint64_t pc_dimm_md_get_addr(const MemoryDeviceState *md)
> diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
> index 16f03701b7..ee01c5e66b 100644
> --- a/hw/misc/ivshmem.c
> +++ b/hw/misc/ivshmem.c
> @@ -909,8 +909,7 @@ static void ivshmem_common_realize(PCIDevice *dev, Error 
> **errp)
>  if (s->hostmem != NULL) {
>  IVSHMEM_DPRINTF("using hostmem\n");
>  
> -s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem,
> - &error_abort);
> +s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
>  } else {
>  Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
>  assert(chr);
> diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
> index 5beb0ef8ab..6e6bd2c1cb 100644
> --- a/include/sysemu/hostmem.h
> +++ b/include/sysemu/hostmem.h
> @@ -62,8 +62,7 @@ struct HostMemoryBackend {
>  };
>  
>  bool host_memory_backend_mr_inited(HostMemoryBackend *backend);
> -MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend,
> - Error **errp);
> +MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend);
>  
>  void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped);
>  bool host_memory_backend_is_mapped(HostMemoryBackend *backend);
> diff --git a/numa.c b/numa.c
> index 33572bfa74..94f758c757 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -523,8 +523,7 @@ void memory_region_allocate_system_memory(MemoryRegion 
> *mr, Object *owner,
>  if (!backend) {
>  continue;
>  }
> -MemoryRegion *seg = host_memory_backend_get_memory(backend,
> -   &error_fatal);
> +MemoryRegion *seg = host_memory_backend_get_memory(backend);
>  
>  if (memory_region_is_mapped(seg)) {
>  char *path = 
> object_get_canonical_path_component(OBJECT(backend));




Re: [Qemu-devel] [PATCH 6/7] target/ppc: Replace intc pointer with a general machine_data pointer

2018-06-13 Thread David Gibson
On Wed, Jun 13, 2018 at 12:11:12PM +0200, Greg Kurz wrote:
> On Wed, 13 Jun 2018 16:57:06 +1000
> David Gibson  wrote:
> 
> > PowerPCCPU contains an (Object *)intc used to point to the cpu's interrupt
> > controller.  Or more precisely to the "presentation" component of the
> > interrupt controller relevant to this cpu.
> > 
> > Really, this field is machine specific.  The machines which use it can
> > point it to different types of object depending on their needs, and most
> > machines don't use it at all (since they have older style PICs which don't
> > have per-cpu presentation components).
> > 
> > There's also other information that's per-cpu, but platform/machine
> > specific.  So replace the intc pointer with a (void *)machine_data which
> > can be managed as the machine type likes to conveniently store per cpu
> > information.
> > 
> > Signed-off-by: David Gibson 
> > ---
> 
> This looks good, just one question below...
> 
> >  hw/intc/xics.c  |  5 +++--
> >  hw/intc/xics_spapr.c| 16 +++-
> >  hw/ppc/pnv.c|  4 ++--
> >  hw/ppc/pnv_core.c   | 11 +--
> >  hw/ppc/spapr.c  |  8 
> >  hw/ppc/spapr_cpu_core.c | 13 ++---
> >  include/hw/ppc/pnv_core.h   |  9 +
> >  include/hw/ppc/spapr_cpu_core.h | 10 ++
> >  include/hw/ppc/xics.h   |  4 ++--
> >  target/ppc/cpu.h|  2 +-
> >  10 files changed, 61 insertions(+), 21 deletions(-)
> > 
> > diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> > index e73e623e3b..689ad44e5f 100644
> > --- a/hw/intc/xics.c
> > +++ b/hw/intc/xics.c
> > @@ -383,7 +383,8 @@ static const TypeInfo icp_info = {
> >  .class_size = sizeof(ICPStateClass),
> >  };
> >  
> > -Object *icp_create(Object *cpu, const char *type, XICSFabric *xi, Error 
> > **errp)
> > +ICPState *icp_create(Object *cpu, const char *type, XICSFabric *xi,
> > + Error **errp)
> >  {
> >  Error *local_err = NULL;
> >  Object *obj;
> > @@ -401,7 +402,7 @@ Object *icp_create(Object *cpu, const char *type, 
> > XICSFabric *xi, Error **errp)
> >  obj = NULL;
> >  }
> >  
> > -return obj;
> > +return ICP(obj);
> >  }
> >  
> >  /*
> > diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
> > index 2e27b92b87..01c76717cf 100644
> > --- a/hw/intc/xics_spapr.c
> > +++ b/hw/intc/xics_spapr.c
> > @@ -31,6 +31,7 @@
> >  #include "trace.h"
> >  #include "qemu/timer.h"
> >  #include "hw/ppc/spapr.h"
> > +#include "hw/ppc/spapr_cpu_core.h"
> >  #include "hw/ppc/xics.h"
> >  #include "hw/ppc/fdt.h"
> >  #include "qapi/visitor.h"
> > @@ -43,8 +44,9 @@ static target_ulong h_cppr(PowerPCCPU *cpu, 
> > sPAPRMachineState *spapr,
> > target_ulong opcode, target_ulong *args)
> >  {
> >  target_ulong cppr = args[0];
> > +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
> >  
> > -icp_set_cppr(ICP(cpu->intc), cppr);
> > +icp_set_cppr(spapr_cpu->icp, cppr);
> >  return H_SUCCESS;
> >  }
> >  
> > @@ -65,7 +67,8 @@ static target_ulong h_ipi(PowerPCCPU *cpu, 
> > sPAPRMachineState *spapr,
> >  static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> > target_ulong opcode, target_ulong *args)
> >  {
> > -uint32_t xirr = icp_accept(ICP(cpu->intc));
> > +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
> > +uint32_t xirr = icp_accept(spapr_cpu->icp);
> >  
> >  args[0] = xirr;
> >  return H_SUCCESS;
> > @@ -74,7 +77,8 @@ static target_ulong h_xirr(PowerPCCPU *cpu, 
> > sPAPRMachineState *spapr,
> >  static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> >   target_ulong opcode, target_ulong *args)
> >  {
> > -uint32_t xirr = icp_accept(ICP(cpu->intc));
> > +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
> > +uint32_t xirr = icp_accept(spapr_cpu->icp);
> >  
> >  args[0] = xirr;
> >  args[1] = cpu_get_host_ticks();
> > @@ -84,9 +88,10 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, 
> > sPAPRMachineState *spapr,
> >  static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> >target_ulong opcode, target_ulong *args)
> >  {
> > +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
> >  target_ulong xirr = args[0];
> >  
> > -icp_eoi(ICP(cpu->intc), xirr);
> > +icp_eoi(spapr_cpu->icp, xirr);
> >  return H_SUCCESS;
> >  }
> >  
> > @@ -94,7 +99,8 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, 
> > sPAPRMachineState *spapr,
> >  target_ulong opcode, target_ulong *args)
> >  {
> >  uint32_t mfrr;
> > -uint32_t xirr = icp_ipoll(ICP(cpu->intc), &mfrr);
> > +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
> > +uint32_t xirr = icp_ipoll(spapr_cpu->icp, &mfrr);
> >  
> >  args[0] = xirr;
> >  args[1] = mfrr;
> > diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> > index

Re: [Qemu-devel] [PATCH 7/7] target/ppc, spapr: Move VPA information to machine_data

2018-06-13 Thread Greg Kurz
On Wed, 13 Jun 2018 16:57:07 +1000
David Gibson  wrote:

> CPUPPCState currently contains a number of fields containing the state of
> the VPA.  The VPA is a PAPR specific concept covering several guest/host
> shared memory areas used to communicate some information with the
> hypervisor.
> 
> As a PAPR concept this is really machine specific information, although it
> is per-cpu, so it doesn't really belong in the core CPU state structure.
> So, move it to the PAPR specific 'machine_data' structure.
> 
> Signed-off-by: David Gibson 
> ---

Nice ! I'll rework VPA migration on top of that.

Reviewed-by: Greg Kurz 

>  hw/ppc/spapr_cpu_core.c |  7 +++
>  hw/ppc/spapr_hcall.c| 77 ++---
>  include/hw/ppc/spapr_cpu_core.h |  3 ++
>  target/ppc/cpu.h|  6 ---
>  target/ppc/translate_init.inc.c |  8 
>  5 files changed, 52 insertions(+), 49 deletions(-)
> 
> diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> index 544bda93e2..f642c95967 100644
> --- a/hw/ppc/spapr_cpu_core.c
> +++ b/hw/ppc/spapr_cpu_core.c
> @@ -28,6 +28,7 @@ static void spapr_cpu_reset(void *opaque)
>  CPUState *cs = CPU(cpu);
>  CPUPPCState *env = &cpu->env;
>  PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
>  target_ulong lpcr;
>  
>  cpu_reset(cs);
> @@ -69,6 +70,12 @@ static void spapr_cpu_reset(void *opaque)
>  
>  /* Set a full AMOR so guest can use the AMR as it sees fit */
>  env->spr[SPR_AMOR] = 0xull;
> +
> +spapr_cpu->vpa_addr = 0;
> +spapr_cpu->slb_shadow_addr = 0;
> +spapr_cpu->slb_shadow_size = 0;
> +spapr_cpu->dtl_addr = 0;
> +spapr_cpu->dtl_size = 0;
>  }
>  
>  void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip, 
> target_ulong r3)
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 8b9a4b577f..ae913d070f 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -8,6 +8,7 @@
>  #include "exec/exec-all.h"
>  #include "helper_regs.h"
>  #include "hw/ppc/spapr.h"
> +#include "hw/ppc/spapr_cpu_core.h"
>  #include "mmu-hash64.h"
>  #include "cpu-models.h"
>  #include "trace.h"
> @@ -908,9 +909,11 @@ unmap_out:
>  #define VPA_SHARED_PROC_OFFSET 0x9
>  #define VPA_SHARED_PROC_VAL0x2
>  
> -static target_ulong register_vpa(CPUPPCState *env, target_ulong vpa)
> +static target_ulong register_vpa(PowerPCCPU *cpu, target_ulong vpa)
>  {
> -CPUState *cs = CPU(ppc_env_get_cpu(env));
> +CPUState *cs = CPU(cpu);
> +CPUPPCState *env = &cpu->env;
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
>  uint16_t size;
>  uint8_t tmp;
>  
> @@ -935,32 +938,34 @@ static target_ulong register_vpa(CPUPPCState *env, 
> target_ulong vpa)
>  return H_PARAMETER;
>  }
>  
> -env->vpa_addr = vpa;
> +spapr_cpu->vpa_addr = vpa;
>  
> -tmp = ldub_phys(cs->as, env->vpa_addr + VPA_SHARED_PROC_OFFSET);
> +tmp = ldub_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET);
>  tmp |= VPA_SHARED_PROC_VAL;
> -stb_phys(cs->as, env->vpa_addr + VPA_SHARED_PROC_OFFSET, tmp);
> +stb_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET, tmp);
>  
>  return H_SUCCESS;
>  }
>  
> -static target_ulong deregister_vpa(CPUPPCState *env, target_ulong vpa)
> +static target_ulong deregister_vpa(PowerPCCPU *cpu, target_ulong vpa)
>  {
> -if (env->slb_shadow_addr) {
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
> +
> +if (spapr_cpu->slb_shadow_addr) {
>  return H_RESOURCE;
>  }
>  
> -if (env->dtl_addr) {
> +if (spapr_cpu->dtl_addr) {
>  return H_RESOURCE;
>  }
>  
> -env->vpa_addr = 0;
> +spapr_cpu->vpa_addr = 0;
>  return H_SUCCESS;
>  }
>  
> -static target_ulong register_slb_shadow(CPUPPCState *env, target_ulong addr)
> +static target_ulong register_slb_shadow(PowerPCCPU *cpu, target_ulong addr)
>  {
> -CPUState *cs = CPU(ppc_env_get_cpu(env));
> +sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
>  uint32_t size;
>  
>  if (addr == 0) {
> @@ -968,7 +973,7 @@ static target_ulong register_slb_shadow(CPUPPCState *env, 
> target_ulong addr)
>  return H_HARDWARE;
>  }
>  
> -size = ldl_be_phys(cs->as, addr + 0x4);
> +size = ldl_be_phys(CPU(cpu)->as, addr + 0x4);
>  if (size < 0x8) {
>  return H_PARAMETER;
>  }
> @@ -977,26 +982,28 @@ static target_ulong register_slb_shadow(CPUPPCState 
> *env, target_ulong addr)
>  return H_PARAMETER;
>  }
>  
> -if (!env->vpa_addr) {
> +if (!spapr_cpu->vpa_addr) {
>  return H_RESOURCE;
>  }
>  
> -env->slb_shadow_addr = addr;
> -env->slb_shadow_size = size;
> +spapr_cpu->slb_shadow_addr = addr;
> +spapr_cpu->slb_shadow_size = size;
>  
>  return H_SUCCESS;
>  }
>  
> -static target_ulong deregister_slb_shadow(CPUPPCState *env, target_ulong 
> addr)
> +static target_ulon

  1   2   3   4   5   >