[Qemu-devel] [PATCH for-2.13] Add host_memory_backend_pagesize() helper

2018-03-28 Thread David Gibson
There are a couple places (one generic, one target specific) where we need
to get the host page size associated with a particular memory backend.  I
have some upcoming code which will add another place which wants this.  So,
for convenience, add a helper function to calculate this.

host_memory_backend_pagesize() returns the host pagesize for a given
HostMemoryBackend object, or for the default backend (-mem-path) if passed
NULL.

Signed-off-by: David Gibson 
---
 backends/hostmem.c   | 20 
 exec.c   | 21 +
 include/sysemu/hostmem.h |  2 ++
 target/ppc/kvm.c | 10 +-
 4 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/backends/hostmem.c b/backends/hostmem.c
index f61093654e..b6a60cfc5d 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -18,6 +18,7 @@
 #include "qapi/visitor.h"
 #include "qemu/config-file.h"
 #include "qom/object_interfaces.h"
+#include "qemu/mmap-alloc.h"
 
 #ifdef CONFIG_NUMA
 #include 
@@ -262,6 +263,25 @@ bool host_memory_backend_is_mapped(HostMemoryBackend 
*backend)
 return backend->is_mapped;
 }
 
+long host_memory_backend_pagesize(HostMemoryBackend *memdev)
+{
+const char *path = NULL;
+
+#ifdef __linux__
+if (memdev) {
+path = object_property_get_str(OBJECT(memdev), "mem-path", NULL);
+} else {
+path = mem_path;
+}
+#endif
+
+if (path) {
+return qemu_mempath_getpagesize(path);
+} else {
+return getpagesize();
+}
+}
+
 static void
 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 {
diff --git a/exec.c b/exec.c
index c09bd93df3..04856c2402 100644
--- a/exec.c
+++ b/exec.c
@@ -1488,18 +1488,13 @@ void ram_block_dump(Monitor *mon)
  */
 static int find_max_supported_pagesize(Object *obj, void *opaque)
 {
-char *mem_path;
 long *hpsize_min = opaque;
 
 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
-mem_path = object_property_get_str(obj, "mem-path", NULL);
-if (mem_path) {
-long hpsize = qemu_mempath_getpagesize(mem_path);
-if (hpsize < *hpsize_min) {
-*hpsize_min = hpsize;
-}
-} else {
-*hpsize_min = getpagesize();
+long hpsize = host_memory_backend_pagesize(MEMORY_BACKEND(obj));
+
+if (hpsize < *hpsize_min) {
+*hpsize_min = hpsize;
 }
 }
 
@@ -1509,15 +1504,9 @@ static int find_max_supported_pagesize(Object *obj, void 
*opaque)
 long qemu_getrampagesize(void)
 {
 long hpsize = LONG_MAX;
-long mainrampagesize;
+long mainrampagesize = host_memory_backend_pagesize(NULL);
 Object *memdev_root;
 
-if (mem_path) {
-mainrampagesize = qemu_mempath_getpagesize(mem_path);
-} else {
-mainrampagesize = getpagesize();
-}
-
 /* it's possible we have memory-backend objects with
  * hugepage-backed RAM. these may get mapped into system
  * address space via -numa parameters or memory hotplug
diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
index 47bc9846ac..f474ef97f6 100644
--- a/include/sysemu/hostmem.h
+++ b/include/sysemu/hostmem.h
@@ -68,4 +68,6 @@ MemoryRegion 
*host_memory_backend_get_memory(HostMemoryBackend *backend,
 
 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped);
 bool host_memory_backend_is_mapped(HostMemoryBackend *backend);
+long host_memory_backend_pagesize(HostMemoryBackend *memdev);
+
 #endif
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index b329cd8173..0adcf18c9f 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -493,15 +493,7 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 {
 Object *mem_obj = object_resolve_path(obj_path, NULL);
-char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
-long pagesize;
-
-if (mempath) {
-pagesize = qemu_mempath_getpagesize(mempath);
-g_free(mempath);
-} else {
-pagesize = getpagesize();
-}
+long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj));
 
 return pagesize >= max_cpu_page_size;
 }
-- 
2.14.3




Re: [Qemu-devel] [RFC for-2.13 09/12] target/ppc: Move 1T segment and AMR options to PPCHash64Options

2018-03-28 Thread David Gibson
On Wed, Mar 28, 2018 at 09:40:13AM +0200, Cédric Le Goater wrote:
> On 03/27/2018 06:37 AM, David Gibson wrote:
> > Currently env->mmu_model is a bit of an unholy mess of an enum of distinct
> > MMU types, with various flag bits as well.  This makes which bits of the
> > field should be compared pretty confusing.
> > 
> > Make a start on cleaning that up by moving two of the flags bits -
> > POWERPC_MMU_1TSEG and POWERPC_MMU_AMR - which are specific to the 64-bit
> > hash MMU into a new flags field in PPCHash64Options structure.
> > 
> > Signed-off-by: David Gibson 
> 
> Reviewed-by: Cédric Le Goater 
> 
> Maybe introduce a small helper :
> 
>   #define ppc_hash64_has(cpu, opt) ((cpu)->hash64_opts->flags &
>   (opt))

Good idea, that makes things rather nicer.  I'll include it in the
next spin.


> 
> Thanks,
> 
> C. 
> 
> > ---
> >  hw/ppc/pnv.c|  3 ++-
> >  hw/ppc/spapr.c  |  2 +-
> >  target/ppc/cpu-qom.h| 11 +++
> >  target/ppc/kvm.c|  4 ++--
> >  target/ppc/mmu-hash64.c |  6 --
> >  target/ppc/mmu-hash64.h |  3 +++
> >  6 files changed, 15 insertions(+), 14 deletions(-)
> > 
> > diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> > index 5a79b24828..0aa878b771 100644
> > --- a/hw/ppc/pnv.c
> > +++ b/hw/ppc/pnv.c
> > @@ -36,6 +36,7 @@
> >  #include "monitor/monitor.h"
> >  #include "hw/intc/intc.h"
> >  #include "hw/ipmi/ipmi.h"
> > +#include "target/ppc/mmu-hash64.h"
> >  
> >  #include "hw/ppc/xics.h"
> >  #include "hw/ppc/pnv_xscom.h"
> > @@ -187,7 +188,7 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, 
> > void *fdt)
> >  _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
> >  }
> >  
> > -if (env->mmu_model & POWERPC_MMU_1TSEG) {
> > +if (cpu->hash64_opts->flags & PPC_HASH64_1TSEG) {
> >  _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
> > segs, sizeof(segs;
> >  }
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index a35bffd524..436ed39f7f 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -557,7 +557,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void 
> > *fdt, int offset,
> >  _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
> >  }
> >  
> > -if (env->mmu_model & POWERPC_MMU_1TSEG) {
> > +if (cpu->hash64_opts->flags & PPC_HASH64_1TSEG) {
> >  _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
> >segs, sizeof(segs;
> >  }
> > diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h
> > index 3e5ef7375f..2bd58b2a84 100644
> > --- a/target/ppc/cpu-qom.h
> > +++ b/target/ppc/cpu-qom.h
> > @@ -68,22 +68,17 @@ enum powerpc_mmu_t {
> >  /* PowerPC 601 MMU model (specific BATs format)*/
> >  POWERPC_MMU_601= 0x000A,
> >  #define POWERPC_MMU_64   0x0001
> > -#define POWERPC_MMU_1TSEG0x0002
> > -#define POWERPC_MMU_AMR  0x0004
> >  #define POWERPC_MMU_V3   0x0010 /* ISA V3.00 MMU Support */
> >  /* 64 bits PowerPC MMU */
> >  POWERPC_MMU_64B= POWERPC_MMU_64 | 0x0001,
> >  /* Architecture 2.03 and later (has LPCR) */
> >  POWERPC_MMU_2_03   = POWERPC_MMU_64 | 0x0002,
> >  /* Architecture 2.06 variant   */
> > -POWERPC_MMU_2_06   = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
> > - | POWERPC_MMU_AMR | 0x0003,
> > +POWERPC_MMU_2_06   = POWERPC_MMU_64 | 0x0003,
> >  /* Architecture 2.07 variant   */
> > -POWERPC_MMU_2_07   = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
> > - | POWERPC_MMU_AMR | 0x0004,
> > +POWERPC_MMU_2_07   = POWERPC_MMU_64 | 0x0004,
> >  /* Architecture 3.00 variant   */
> > -POWERPC_MMU_3_00   = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
> > - | POWERPC_MMU_AMR | POWERPC_MMU_V3
> > +POWERPC_MMU_3_00   = POWERPC_MMU_64 | POWERPC_MMU_V3
> >   | 0x0005,
> >  };
> >  #define POWERPC_MMU_VER(x) ((x) & (POWERPC_MMU_64 | 0x))
> > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> > index 01947169c9..3424917381 100644
> > --- a/target/ppc/kvm.c
> > +++ b/target/ppc/kvm.c
> > @@ -302,7 +302,7 @@ static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
> >  /* HV KVM has backing store size restrictions */
> >  info->flags = KVM_PPC_PAGE_SIZES_REAL;
> >  
> > -if (env->mmu_model & POWERPC_MMU_1TSEG) {
> > +if (cpu->hash64_opts->flags & PPC_HASH64_1TSEG) {
> >  info->flags |= KVM_PPC_1T_SEGMENTS;
> >  }
> >  
> > @@ -482,7 +482,7 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
> >  }
> >  env->slb_nr = smmu_info.slb_size;
> >  if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {

Re: [Qemu-devel] [RFC for-2.13 07/12] target/ppc: Split page size information into a separate allocation

2018-03-28 Thread David Gibson
On Wed, Mar 28, 2018 at 09:28:41AM +0200, Cédric Le Goater wrote:
> On 03/27/2018 06:37 AM, David Gibson wrote:
> > env->sps contains page size encoding information as an embedded structure.
> > Since this information is specific to 64-bit hash MMUs, split it out into
> > a separately allocated structure, to reduce the basic env size for other
> > cpus.  Along the way we make a few other cleanups:
> > 
> > * Rename to PPCHash64Options which is more in line with qemu name
> >   conventions, and reflects that we're going to merge some more hash64
> >   mmu specific details in there in future
> > 
> > * Move structure definitions to the mmu-hash64.[ch] files.
> > 
> > Signed-off-by: David Gibson 
> 
> 
> Reviewed-by: Cédric Le Goater 
> 
> While you are at changing things, maybe you could CamelCase 
> 
>   struct ppc_one_seg_page_size
>   struct ppc_one_page_size

Good idea, I'll include that in the next spin.

> 
> No big deal.
> 
> Thanks,
> 
> C.
> 
> > ---
> >  hw/ppc/fdt.c|  4 ++--
> >  target/ppc/cpu-qom.h|  4 ++--
> >  target/ppc/cpu.h| 22 +
> >  target/ppc/kvm.c|  4 ++--
> >  target/ppc/mmu-hash64.c | 47 
> > -
> >  target/ppc/mmu-hash64.h | 21 
> >  target/ppc/translate_init.c | 36 +++---
> >  7 files changed, 69 insertions(+), 69 deletions(-)
> > 
> > diff --git a/hw/ppc/fdt.c b/hw/ppc/fdt.c
> > index 2721603ffa..c4ba16f6b4 100644
> > --- a/hw/ppc/fdt.c
> > +++ b/hw/ppc/fdt.c
> > @@ -9,6 +9,7 @@
> >  
> >  #include "qemu/osdep.h"
> >  #include "target/ppc/cpu.h"
> > +#include "target/ppc/mmu-hash64.h"
> >  
> >  #include "hw/ppc/fdt.h"
> >  
> > @@ -16,13 +17,12 @@
> >  size_t ppc_create_page_sizes_prop(PowerPCCPU *cpu, uint32_t *prop,
> >size_t maxsize)
> >  {
> > -CPUPPCState *env = >env;
> >  size_t maxcells = maxsize / sizeof(uint32_t);
> >  int i, j, count;
> >  uint32_t *p = prop;
> >  
> >  for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
> > -struct ppc_one_seg_page_size *sps = >sps.sps[i];
> > +struct ppc_one_seg_page_size *sps = >hash64_opts->sps[i];
> >  
> >  if (!sps->page_shift) {
> >  break;
> > diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h
> > index 9bbb05cf62..3e5ef7375f 100644
> > --- a/target/ppc/cpu-qom.h
> > +++ b/target/ppc/cpu-qom.h
> > @@ -160,7 +160,7 @@ enum powerpc_input_t {
> >  PPC_FLAGS_INPUT_RCPU,
> >  };
> >  
> > -struct ppc_segment_page_sizes;
> > +typedef struct PPCHash64Options PPCHash64Options;
> >  
> >  /**
> >   * PowerPCCPUClass:
> > @@ -194,7 +194,7 @@ typedef struct PowerPCCPUClass {
> >  uint32_t flags;
> >  int bfd_mach;
> >  uint32_t l1_dcache_size, l1_icache_size;
> > -const struct ppc_segment_page_sizes *sps;
> > +const PPCHash64Options *hash64_opts;
> >  struct ppc_radix_page_info *radix_page_info;
> >  void (*init_proc)(CPUPPCState *env);
> >  int  (*check_pow)(CPUPPCState *env);
> > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> > index c621a6bd5e..fb6c578eb5 100644
> > --- a/target/ppc/cpu.h
> > +++ b/target/ppc/cpu.h
> > @@ -948,28 +948,8 @@ enum {
> >  
> >  #define DBELL_PROCIDTAG_MASK   PPC_BITMASK(44, 63)
> >  
> > -/*/
> > -/* Segment page size information, used by recent hash MMUs
> > - * The format of this structure mirrors kvm_ppc_smmu_info
> > - */
> > -
> >  #define PPC_PAGE_SIZES_MAX_SZ   8
> >  
> > -struct ppc_one_page_size {
> > -uint32_t page_shift;  /* Page shift (or 0) */
> > -uint32_t pte_enc; /* Encoding in the HPTE (>>12) */
> > -};
> > -
> > -struct ppc_one_seg_page_size {
> > -uint32_t page_shift;  /* Base page shift of segment (or 0) */
> > -uint32_t slb_enc; /* SLB encoding for BookS */
> > -struct ppc_one_page_size enc[PPC_PAGE_SIZES_MAX_SZ];
> > -};
> > -
> > -struct ppc_segment_page_sizes {
> > -struct ppc_one_seg_page_size sps[PPC_PAGE_SIZES_MAX_SZ];
> > -};
> > -
> >  struct ppc_radix_page_info {
> >  uint32_t count;
> >  uint32_t entries[PPC_PAGE_SIZES_MAX_SZ];
> > @@ -1106,7 +1086,6 @@ struct CPUPPCState {
> >  uint64_t insns_flags;
> >  uint64_t insns_flags2;
> >  #if defined(TARGET_PPC64)
> > -struct ppc_segment_page_sizes sps;
> >  ppc_slb_t vrma_slb;
> >  target_ulong rmls;
> >  bool ci_large_pages;
> > @@ -1227,6 +1206,7 @@ struct PowerPCCPU {
> >  PPCVirtualHypervisor *vhyp;
> >  Object *intc;
> >  int32_t node_id; /* NUMA node this CPU belongs to */
> > +PPCHash64Options *hash64_opts;
> >  
> >  /* Fields related to migration compatibility hacks */
> >  bool pre_2_8_migration;
> > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> > index 6160356a4a..01947169c9 100644
> > 

Re: [Qemu-devel] [RFC for-2.13 11/12] target/ppc: Remove unnecessary POWERPC_MMU_V3 flag from mmu_model

2018-03-28 Thread David Gibson
On Wed, Mar 28, 2018 at 12:19:37PM +0200, Cédric Le Goater wrote:
> On 03/28/2018 10:47 AM, David Gibson wrote:
> > On Wed, Mar 28, 2018 at 09:49:25AM +0200, Cédric Le Goater wrote:
> >> On 03/28/2018 09:43 AM, Cédric Le Goater wrote:
> >>> On 03/27/2018 06:37 AM, David Gibson wrote:
>  The only place we test this flag is in conjunction with
>  ppc64_use_proc_tbl().  That checks for the LPCR_UPRT bit, which we 
>  already
>  ensure can't be set except on a machine with a v3 MMU (i.e. POWER9).
> >>>
> >>> hmm, ok, but what will I use for the PowerNV hash MMU support then ? 
> >>
> >> That will be POWERPC_MMU_3_00.
> > 
> > You could check for that explicitly, or you could just check for
> > presence of non-NULL hash64_opts.  The idea is that will always be the
> > case for cpus capable of using the hash MMU.
> 
> ok. I will rebase when your patchset is merged.
>  
> > I'm also considering adding a similar radix_opts with radix specific
> > details.  
> 
> yes. It looks a bit unbalanced now.

Right.  In theory it would be nice to split out hash32 / BookE /
whatever options into their own substructures as well, but I doubt
anyone will ever care enough to actually do it.

> > POWER9 would have both, since it can support either mode.
> > 
> >> I didn't realize mmu_model was so 
> >> crowded ..
> > 
> > It's not so that it's short of space.  It's more that the mix of enum
> > like pieces and bitflag like pieces like bits makes it confusing to
> > know whether it should be tested with simple equality or with &.  And
> > if testing with equality which bits should be masked for a sensible
> > comparison.
> > 
> > Additionally, I'd like to get options that are strictly related to the
> > hash mmu out of the general structures.
> 
> which are ? vrma_slb, rmls ?

Ah.. so.. for now I'm just thinking about MMU options / capabilities
rather than MMU state.  That is, things which are set at
initialization but then don't change.  rmls and vrma_slb don't fit in
that category.  slb_nr does, though - I had a shot at moving it to
hash64_opts, but hit some complications, so I might come back to it
later.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v2 04/10] migration: detect compression and decompression errors

2018-03-28 Thread Peter Xu
On Thu, Mar 29, 2018 at 11:51:03AM +0800, Xiao Guangrong wrote:
> 
> 
> On 03/28/2018 05:59 PM, Peter Xu wrote:
> > On Tue, Mar 27, 2018 at 05:10:37PM +0800, guangrong.x...@gmail.com wrote:
> > 
> > [...]
> > 
> > > -static int compress_threads_load_setup(void)
> > > +static int compress_threads_load_setup(QEMUFile *f)
> > >   {
> > >   int i, thread_count;
> > > @@ -2665,6 +2685,7 @@ static int compress_threads_load_setup(void)
> > >   }
> > >   decomp_param[i].stream.opaque = _param[i];
> > > +decomp_param[i].file = f;
> > 
> > On the source side the error will be set via:
> > 
> >  qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
> > 
> > Maybe we can do similar things using migrate_incoming_get_current() to
> > avoid caching the QEMUFile multiple times?
> > 
> 
> I have considered it, however, it can not work as the @file used by ram
> loader is not the file got from migrate_incoming_get_current() under some
> cases.
> 
> For example, in colo_process_incoming_thread(), the file passed to
> qemu_loadvm_state() is a internal buffer and it is not easy to switch it
> to incoming file.

I see. How about cache it in a global variable?  We have these
already:

thread_count = migrate_decompress_threads();
decompress_threads = g_new0(QemuThread, thread_count);
decomp_param = g_new0(DecompressParam, thread_count);
...

IMHO we can add a new one too, at least we don't cache it multiple
times (after all decomp_param[i]s are global variables too).

-- 
Peter Xu



Re: [Qemu-devel] [PATCH] tcg: fix 16-byte vector operations detection

2018-03-28 Thread Miroslav Rezanina
On Wed, Mar 28, 2018 at 03:31:52PM +0200, Laurent Vivier wrote:
> configure tries to detect if the compiler
> supports 16-byte vector operations.
> 
> As stated in the comment of the detection
> program, there is a problem with the system
> compiler on GCC on Centos 7.
> 
> This program doesn't actually detect the problem
> with GCC on RHEL7 on PPC64LE (Red Hat 4.8.5-28).
> 
> This patch updates the test to look more like
> it is in QEMU helpers, and now detects the problem.
> 
> The error reported is:
> 
>   CC  ppc64-softmmu/accel/tcg/tcg-runtime-gvec.o
>   ..//accel/tcg/tcg-runtime-gvec.c: In function ‘helper_gvec_shl8i’:
>   ../accel/tcg/tcg-runtime-gvec.c:558:26: internal compiler error: in 
> emit_move_insn, at expr.c:3495
>*(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
> ^
> Fixes: db43267 "tcg: Add generic vector expanders"
> Signed-off-by: Laurent Vivier 
> ---
>  configure | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/configure b/configure
> index 4d0e92c96c..a2301dd0dc 100755
> --- a/configure
> +++ b/configure
> @@ -5054,6 +5054,14 @@ static S2 c2;
>  static S4 c4;
>  static S8 c8;
>  static int i;
> +void helper(void *d, void *a, int shift, int i);
> +void helper(void *d, void *a, int shift, int i)
> +{
> +  *(U1 *)(d + i) = *(U1 *)(a + i) << shift;
> +  *(U2 *)(d + i) = *(U2 *)(a + i) << shift;
> +  *(U4 *)(d + i) = *(U4 *)(a + i) << shift;
> +  *(U8 *)(d + i) = *(U8 *)(a + i) << shift;
> +}
>  int main(void)
>  {
>a1 += b1; a2 += b2; a4 += b4; a8 += b8;
> -- 
> 2.14.3
> 
>

Build works correctly for RHEL 7 with this patch

Reviewed-by: Miroslav Rezanina  



Re: [Qemu-devel] [virtio-dev] Re: [PATCH v2 0/6] Extend vhost-user to support VFIO based accelerators

2018-03-28 Thread Michael S. Tsirkin
On Thu, Mar 29, 2018 at 11:33:29AM +0800, Tiwei Bie wrote:
> On Wed, Mar 28, 2018 at 06:33:01PM +0300, Michael S. Tsirkin wrote:
> > On Wed, Mar 28, 2018 at 08:24:07PM +0800, Tiwei Bie wrote:
> > > > > Update notes
> > > > > 
> > > > > 
> > > > > IOMMU feature bit check is removed in this version, because:
> > > > > 
> > > > > The IOMMU feature is negotiable, when an accelerator is used and
> > > > > it doesn't support virtual IOMMU, its driver just won't provide
> > > > > this feature bit when vhost library querying its features. And if
> > > > > it supports the virtual IOMMU, its driver can provide this feature
> > > > > bit. It's not reasonable to add this limitation in this patch set.
> > > > 
> > > > Fair enough. Still:
> > > > Can hardware on intel platforms actually support IOTLB requests?
> > > > Don't you need to add support for vIOMMU shadowing instead?
> > > > 
> > > 
> > > For the hardware I have, I guess they can't for now.
> > 
> > So VFIO in QEMU has support for vIOMMU shadowing.
> > Can you use that somehow?
> 
> Yeah, I guess we can use it in some way. Actually supporting
> vIOMMU is a quite interesting feature. It would provide
> better security, and for the hardware backend case there
> would be no performance penalty with static mapping after
> the backend got all the mappings. I think it could be done
> as another work. Based on your previous suggestion in this
> thread, I have split the guest notification offload and host
> notification offload (I'll send the new version very soon).
> And I plan to let this patch set just focus on fixing the
> most critical performance issue - the host notification offload.
> With this fix, using hardware backend in vhost-user could get
> a very big performance boost and become much more practicable.
> So maybe we can focus on fixing this critical performance issue
> first. How do you think?

I think correctness and security go first before performance.
vIOMMU goes under security.

> > 
> > Ability to run dpdk within guest seems important.
> 
> I think vIOMMU isn't a must to run DPDK in guest.

Oh yes it is.

> For Linux
> guest we also have igb_uio and uio_pci_generic to run DPDK,
> for FreeBSD guest we have nic_uio.

These hacks offer no protection from a buggy userspace corrupting guest
kernel memory. Given DPDK is routinely linked into closed source
applications, this is not a configuration anyone can support.


> They don't need vIOMMU,
> and they could offer the best performance.
> 
> Best regards,
> Tiwei Bie
> 
> > 
> > -- 
> > MST
> > 
> > -
> > To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
> > For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org
> > 



Re: [Qemu-devel] [PATCH v2 03/10] migration: stop decompression to allocate and free memory frequently

2018-03-28 Thread Peter Xu
On Thu, Mar 29, 2018 at 11:43:07AM +0800, Xiao Guangrong wrote:
> 
> 
> On 03/28/2018 05:42 PM, Peter Xu wrote:
> > On Tue, Mar 27, 2018 at 05:10:36PM +0800, guangrong.x...@gmail.com wrote:
> > 
> > [...]
> > 
> > > +static int compress_threads_load_setup(void)
> > > +{
> > > +int i, thread_count;
> > > +
> > > +if (!migrate_use_compression()) {
> > > +return 0;
> > > +}
> > > +
> > > +thread_count = migrate_decompress_threads();
> > > +decompress_threads = g_new0(QemuThread, thread_count);
> > > +decomp_param = g_new0(DecompressParam, thread_count);
> > > +qemu_mutex_init(_done_lock);
> > > +qemu_cond_init(_done_cond);
> > > +for (i = 0; i < thread_count; i++) {
> > > +if (inflateInit(_param[i].stream) != Z_OK) {
> > > +goto exit;
> > > +}
> > > +decomp_param[i].stream.opaque = _param[i];
> > 
> > Same question as the encoding patch here, otherwise looks good to me.
> 
> Thanks for you pointed out, will fix.
> 
> Hmm, can i treat it as your Reviewed-by for the next version?

Yes :), as long as we drop the usage of zstream.opaque and use any
existing fields.

And also for the previous patch too, since they are mostly the same.

Thanks,

-- 
Peter Xu



Re: [Qemu-devel] [PATCH v2 04/10] migration: detect compression and decompression errors

2018-03-28 Thread Xiao Guangrong



On 03/28/2018 05:59 PM, Peter Xu wrote:

On Tue, Mar 27, 2018 at 05:10:37PM +0800, guangrong.x...@gmail.com wrote:

[...]


-static int compress_threads_load_setup(void)
+static int compress_threads_load_setup(QEMUFile *f)
  {
  int i, thread_count;
  
@@ -2665,6 +2685,7 @@ static int compress_threads_load_setup(void)

  }
  decomp_param[i].stream.opaque = _param[i];
  
+decomp_param[i].file = f;


On the source side the error will be set via:

 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);

Maybe we can do similar things using migrate_incoming_get_current() to
avoid caching the QEMUFile multiple times?



I have considered it, however, it can not work as the @file used by ram
loader is not the file got from migrate_incoming_get_current() under some
cases.

For example, in colo_process_incoming_thread(), the file passed to
qemu_loadvm_state() is a internal buffer and it is not easy to switch it
to incoming file.




Re: [Qemu-devel] [PATCH v2 03/10] migration: stop decompression to allocate and free memory frequently

2018-03-28 Thread Xiao Guangrong



On 03/28/2018 05:42 PM, Peter Xu wrote:

On Tue, Mar 27, 2018 at 05:10:36PM +0800, guangrong.x...@gmail.com wrote:

[...]


+static int compress_threads_load_setup(void)
+{
+int i, thread_count;
+
+if (!migrate_use_compression()) {
+return 0;
+}
+
+thread_count = migrate_decompress_threads();
+decompress_threads = g_new0(QemuThread, thread_count);
+decomp_param = g_new0(DecompressParam, thread_count);
+qemu_mutex_init(_done_lock);
+qemu_cond_init(_done_cond);
+for (i = 0; i < thread_count; i++) {
+if (inflateInit(_param[i].stream) != Z_OK) {
+goto exit;
+}
+decomp_param[i].stream.opaque = _param[i];


Same question as the encoding patch here, otherwise looks good to me.


Thanks for you pointed out, will fix.

Hmm, can i treat it as your Reviewed-by for the next version?



Re: [Qemu-devel] [PATCH v2 02/10] migration: stop compression to allocate and free memory frequently

2018-03-28 Thread Xiao Guangrong



On 03/28/2018 05:25 PM, Peter Xu wrote:

On Tue, Mar 27, 2018 at 05:10:35PM +0800, guangrong.x...@gmail.com wrote:

[...]


@@ -357,10 +358,20 @@ static void compress_threads_save_cleanup(void)
  terminate_compression_threads();
  thread_count = migrate_compress_threads();
  for (i = 0; i < thread_count; i++) {
+/*
+ * stream.opaque can be used to store private data, we use it
+ * as a indicator which shows if the thread is properly init'd
+ * or not
+ */
+if (!comp_param[i].stream.opaque) {
+break;
+}


How about using comp_param[i].file?  The opaque seems to be hiding
deeper, and...



Yes, indeed, good suggestion.


  qemu_thread_join(compress_threads + i);
  qemu_fclose(comp_param[i].file);
  qemu_mutex_destroy(_param[i].mutex);
  qemu_cond_destroy(_param[i].cond);
+deflateEnd(_param[i].stream);
+comp_param[i].stream.opaque = NULL;
  }
  qemu_mutex_destroy(_done_lock);
  qemu_cond_destroy(_done_cond);
@@ -370,12 +381,12 @@ static void compress_threads_save_cleanup(void)
  comp_param = NULL;
  }
  
-static void compress_threads_save_setup(void)

+static int compress_threads_save_setup(void)
  {
  int i, thread_count;
  
  if (!migrate_use_compression()) {

-return;
+return 0;
  }
  thread_count = migrate_compress_threads();
  compress_threads = g_new0(QemuThread, thread_count);
@@ -383,6 +394,12 @@ static void compress_threads_save_setup(void)
  qemu_cond_init(_done_cond);
  qemu_mutex_init(_done_lock);
  for (i = 0; i < thread_count; i++) {
+if (deflateInit(_param[i].stream,
+   migrate_compress_level()) != Z_OK) {


(indent issue)



Will fix.


+goto exit;
+}
+comp_param[i].stream.opaque = _param[i];


...here from document:

 ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));

 Initializes the internal stream state for compression. The
 fields zalloc, zfree and opaque must be initialized before by
 the caller. If zalloc and zfree are set to Z_NULL, deflateInit
 updates them to use default allocation functions.

So shall we init opaque first?  Otherwise looks good to me.


No, opaque need to be init-ed only if zalloc and zfree are specified, it
is not the case in this patch.





Re: [Qemu-devel] [virtio-dev] Re: [PATCH v2 0/6] Extend vhost-user to support VFIO based accelerators

2018-03-28 Thread Tiwei Bie
On Wed, Mar 28, 2018 at 06:33:01PM +0300, Michael S. Tsirkin wrote:
> On Wed, Mar 28, 2018 at 08:24:07PM +0800, Tiwei Bie wrote:
> > > > Update notes
> > > > 
> > > > 
> > > > IOMMU feature bit check is removed in this version, because:
> > > > 
> > > > The IOMMU feature is negotiable, when an accelerator is used and
> > > > it doesn't support virtual IOMMU, its driver just won't provide
> > > > this feature bit when vhost library querying its features. And if
> > > > it supports the virtual IOMMU, its driver can provide this feature
> > > > bit. It's not reasonable to add this limitation in this patch set.
> > > 
> > > Fair enough. Still:
> > > Can hardware on intel platforms actually support IOTLB requests?
> > > Don't you need to add support for vIOMMU shadowing instead?
> > > 
> > 
> > For the hardware I have, I guess they can't for now.
> 
> So VFIO in QEMU has support for vIOMMU shadowing.
> Can you use that somehow?

Yeah, I guess we can use it in some way. Actually supporting
vIOMMU is a quite interesting feature. It would provide
better security, and for the hardware backend case there
would be no performance penalty with static mapping after
the backend got all the mappings. I think it could be done
as another work. Based on your previous suggestion in this
thread, I have split the guest notification offload and host
notification offload (I'll send the new version very soon).
And I plan to let this patch set just focus on fixing the
most critical performance issue - the host notification offload.
With this fix, using hardware backend in vhost-user could get
a very big performance boost and become much more practicable.
So maybe we can focus on fixing this critical performance issue
first. How do you think?

> 
> Ability to run dpdk within guest seems important.

I think vIOMMU isn't a must to run DPDK in guest. For Linux
guest we also have igb_uio and uio_pci_generic to run DPDK,
for FreeBSD guest we have nic_uio. They don't need vIOMMU,
and they could offer the best performance.

Best regards,
Tiwei Bie

> 
> -- 
> MST
> 
> -
> To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org
> 



[Qemu-devel] [PATCH qemu] RFC: memory/hmp: Print owners/parents in "info mtree"

2018-03-28 Thread Alexey Kardashevskiy
This adds owners/parents (which are the same, just occasionally
owner==NULL) printing for memory regions; a new '-o' flag
enabled new output.

Signed-off-by: Alexey Kardashevskiy 
---

Does this look anything useful?

There are cases ("msi", "msix-table", "msix-pba" and probably more) when
it is not clear what owns an MR while they all have an owner (always? mostly?).


"info mtree" example:

address-space: memory
  - (prio 0, i/o): system parent:{obj}
-7fff (prio 0, ram): ppc_spapr.ram parent:{obj}
2000-2000 (prio 0, i/o): alias 
p...@8002000.io-alias @p...@8002000.io 
- owner:{dev path=/machine/unattached/device[3]}
20008000-2000 (prio 0, i/o): alias 
pci@8002000.mmio32-alias @p...@8002000.mmio 
8000- owner:{dev path=/machine/unattached/device[3]}
2100-21ff (prio 0, i/o): alias 
pci@8002000.mmio64-alias @p...@8002000.mmio 
2100-21ff owner:{dev path=/machine/unattached/device[3]}

address-space: I/O
  - (prio 0, i/o): io parent:{obj}

address-space: cpu-memory-0
  - (prio 0, i/o): system parent:{obj}
-7fff (prio 0, ram): ppc_spapr.ram parent:{obj}
2000-2000 (prio 0, i/o): alias 
p...@8002000.io-alias @p...@8002000.io 
- owner:{dev path=/machine/unattached/device[3]}
20008000-2000 (prio 0, i/o): alias 
pci@8002000.mmio32-alias @p...@8002000.mmio 
8000- owner:{dev path=/machine/unattached/device[3]}
2100-21ff (prio 0, i/o): alias 
pci@8002000.mmio64-alias @p...@8002000.mmio 
2100-21ff owner:{dev path=/machine/unattached/device[3]}

address-space: pci@8002000
  - (prio 0, i/o): 
pci@8002000.iommu-root owner:{dev path=/machine/unattached/device[3]}
- (prio 0, i/o): tce-root-8001 
owner:{dev path=/machine/unattached/device[3]/tce-table-8001}
  0800-08007fff (prio 0, i/o): tce-iommu-8001 
owner:{dev path=/machine/unattached/device[3]/tce-table-8001}
- (prio 0, i/o): tce-root-8000 
owner:{dev path=/machine/unattached/device[3]/tce-table-8000}
  -3fff (prio 0, i/o): tce-iommu-8000 
owner:{dev path=/machine/unattached/device[3]/tce-table-8000}
0400-0400 (prio 0, i/o): msi owner:{dev 
path=/machine/unattached/device[3]}

address-space: vfio-pci
  - (prio 0, i/o): bus master container 
owner:{dev id=vfio0001_03_00_0}
- (prio 0, i/o): alias bus master 
@pci@8002000.iommu-root - owner:{dev 
id=vfio0001_03_00_0}

memory-region: p...@8002000.io
  - (prio 0, i/o): p...@8002000.io 
owner:{dev path=/machine/unattached/device[3]}

memory-region: p...@8002000.mmio
  - (prio 0, i/o): p...@8002000.mmio 
owner:{dev path=/machine/unattached/device[3]}
2100-2100 (prio 1, i/o): 0001:03:00.0 base BAR 1 
owner:{dev id=vfio0001_03_00_0}
  2100-2100 (prio 0, i/o): 0001:03:00.0 BAR 1 
owner:{dev id=vfio0001_03_00_0}
  2100e000-2100e5ff (prio 0, i/o): msix-table owner:{dev 
id=vfio0001_03_00_0}
  2100f000-2100f00f (prio 0, i/o): msix-pba [disabled] 
owner:{dev id=vfio0001_03_00_0}
2104-2107 (prio 1, i/o): 0001:03:00.0 base BAR 3 
owner:{dev id=vfio0001_03_00_0}
  2104-2107 (prio 0, i/o): 0001:03:00.0 BAR 3 
owner:{dev id=vfio0001_03_00_0}
2104-2107 (prio 0, ramd): 0001:03:00.0 BAR 3 
mmaps[0] owner:{dev id=vfio0001_03_00_0}

memory-region: pci@8002000.iommu-root
  - (prio 0, i/o): 
pci@8002000.iommu-root owner:{dev path=/machine/unattached/device[3]}
- (prio 0, i/o): tce-root-8001 
owner:{dev path=/machine/unattached/device[3]/tce-table-8001}
  0800-08007fff (prio 0, i/o): tce-iommu-8001 
owner:{dev path=/machine/unattached/device[3]/tce-table-8001}
- (prio 0, i/o): tce-root-8000 
owner:{dev path=/machine/unattached/device[3]/tce-table-8000}
  -3fff (prio 0, i/o): tce-iommu-8000 
owner:{dev 

Re: [Qemu-devel] [PATCH qemu] vfio: Print address space address when cannot map MMIO for DMA

2018-03-28 Thread Alexey Kardashevskiy
On 29/3/18 8:03 am, Auger Eric wrote:
> Hi Alexey, Alex,
> On 22/03/18 09:18, Alexey Kardashevskiy wrote:
>> The 567b5b309abe ("vfio/pci: Relax DMA map errors for MMIO regions") added
>> an error message if a passed memory section address or size is not aligned
>> to the minimal IOMMU page size. However although it checks
>> offset_within_address_space for the alignment, offset_within_region is
>> printed instead which makes it harder to find out what device caused
>> the message so this replaces offset_within_region with
>> offset_within_address_space.
>>
>> While we are here, this replaces '..' with 'size=' (as the second number
>> is a size, not an end offset) and adds a memory region name.
>>
>> Fixes: 567b5b309abe "vfio/pci: Relax DMA map errors for MMIO regions"
>> Signed-off-by: Alexey Kardashevskiy 
> The patch indeed fixes the reported format issues.
> 
> However I have some other concerns with the info that is reported to the
> end-user. See below.
> 
> Assigning an e1000e device with a 64kB host, here are the traces I get:
> 
> Region XXX is not aligned to 0x1 and cannot be mapped for DMA
> 
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0x3fb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0xffb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0x3fb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0xffb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0x3fb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a4808 size=0xb7f8
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100e0050 size=0x3fb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100e4808 size=0xb7f8
> 
> It took me some time to understand what happens but here is now my
> understanding:
> 
> 1) When looking at vfio_pci_write_config() pdev->io_regions[bar].addr =
> bar_addr in vfio_sub_page_bar_update_mapping() I see the following values:
> 
> UNMAPPED -> 0x0 ->UNMAPPED -> 0x100a -> UNMAPPED -> 0x100a ->
> UNMAPPED -> 0x100e
> 
> vfio_sub_page_bar_update_mapping() create mrs with base bar at
> 0x100a and 0x100e successively, hence the
> vfio_listener_region_add on 0x100a. Indeed, 0x0-0x50 msix-table mmio
> region induces some memory section at 0x100a0050 and 0x100e50 successively.
> 
> However this is confusing for the end-user who only has access to the
> final mapping (0x100e) through lspi [1].


The trace shows that at least at some point the BAR actually was
0x100a, I find this info rather useful than confusing as it might
expose a bug of some sort, for example.

The user also has access to the MR name which is the host PCI address + BAR
index, how is that confusing?


> 2) The changes in the size (0x3fb0 <-> 0xffb0) relate to the extension
> of the 16kB bar to 64kB in vfio_sub_page_bar_update_mapping
>> 3) Also it happens that I have a virtio-scsi-pci device that is put just
> after the BAR3 at 0x100a4000 and 0x100e4000 successively. The device has

e1000e gets aligned to 64k but this one avoids the alignment for some reason?


> its own msi-table and pba mmio regions[2]. As mmaps[0] is extended to
> 64kB (with prio 0), we have those MMIO regions which result in new
> memory sections, which cause vfio_listener_region_add calls. This
> typically explains why we get a warning on 0x100e4808 (0xb7f8). By the
> way I don't get why we don't have a trace for "0004:01:00.0 BAR 3
> mmaps[0]" 0x100e4040 size=0x7c0, ie. mmaps[0] space between
> virtio-scsi-pci msic-table and pba.


"info mtree -f" might give a hint how MRs got resolved, could it end up
being emulated (==skipped by the vfio listener)?


> So at the end of the day, my fear is all those info become really
> frightening and confusing for the end-user and even not relevant
> (0x100a stuff). So I would rather simply remove the trace in 2.12
> until we find a place where we could generate a clear hint for the
> end-user, suggesting to relocate the msix bar.
> 
> Thoughts?

Please post complete "lspci -v" output for both pci devices and "info mtree
-f" (in addition to "info mtree", not instead).

In general, the error_report() could be removed as we did not have any
indication of not mapping before so we do not have to start now, I am just
missing the point here - the message exposes potentially not-working P2P
which is useful for people who care about that and do not know if actually
might work. Rather than silencing it, I'd convert it into the trace point.

Thanks,


> Thanks
> 
> Eric
> 
> 
> [1] lspci
> 
> Region 3: Memory at 104e (32-bit, non-prefetchable) [size=16K]
> Expansion ROM at 1048 [disabled] [size=256K]
> ../..
> Capabilities: [a0] MSI-X: Enable+ Count=5 Masked-
> Vector table: BAR=3 offset=
>   PBA: BAR=3 offset=2000
> 
> [2] info mtree (final)
> 
> 100e-100e (prio 0, i/o): 0004:01:00.0 base BAR 3
>  100e-100e004f (prio 0, i/o): msix-table
>  100e-100e (prio 0, i/o): 0004:01:00.0 BAR 3
>   100e-100e (prio 0, ramd): 0004:01:00.0 BAR 3 mmaps[0]
>   

Re: [Qemu-devel] [PATCH v2 5/6] e1000: Choose which set of props to migrate

2018-03-28 Thread Jason Wang



On 2018年03月29日 00:36, Dr. David Alan Gilbert (git) wrote:

From: "Dr. David Alan Gilbert" 

When we're using the subsection we migrate both
the 'props' and 'tso_props' data; when we're not using
the subsection (to migrate to 2.11 or old machine types) we've
got to choose what to migrate in the main structure.

If we're using the subsection migrate 'props' in the main structure.
If we're not using the subsection then migrate the last one
that changed, which gives behaviour similar to the old behaviour.




But only after migration. Why not simply switch back to the old behavior 
if migrate_tso_props if false?


Thanks




Re: [Qemu-devel] [PATCH v2 2/2] vhost-user: back SET/GET_CONFIG requests with a protocol feature

2018-03-28 Thread Liu, Changpeng


> -Original Message-
> From: Maxime Coquelin [mailto:maxime.coque...@redhat.com]
> Sent: Thursday, March 29, 2018 3:28 AM
> To: m...@redhat.com; Liu, Changpeng ;
> marcandre.lur...@redhat.com; qemu-devel@nongnu.org
> Cc: Maxime Coquelin 
> Subject: [PATCH v2 2/2] vhost-user: back SET/GET_CONFIG requests with a
> protocol feature
> 
> Without a dedicated protocol feature, QEMU cannot know whether
> the backend can handle VHOST_USER_SET_CONFIG and
> VHOST_USER_GET_CONFIG messages.
> 
> This patch adds a protocol feature that is only advertised by
> QEMU if the device implements the config ops. Vhost user init
> fails if the device support the feature but the backend doesn't.
> 
> The backend should only send VHOST_USER_SLAVE_CONFIG_CHANGE_MSG
> requests if the protocol feature has been negotiated.
> 
> Signed-off-by: Maxime Coquelin 

Passed our own vhost-user-blk target with the patch, I can submit a fix to QEMU 
vhost-user-blk example
after this commit.

Tested-by: Changpeng Liu 
> ---
>  docs/interop/vhost-user.txt | 21 -
>  hw/virtio/vhost-user.c  | 22 ++
>  2 files changed, 34 insertions(+), 9 deletions(-)
> 
> diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
> index c058c407df..534caab18a 100644
> --- a/docs/interop/vhost-user.txt
> +++ b/docs/interop/vhost-user.txt
> @@ -379,6 +379,7 @@ Protocol features
>  #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN   6
>  #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
>  #define VHOST_USER_PROTOCOL_F_PAGEFAULT  8
> +#define VHOST_USER_PROTOCOL_F_CONFIG 9
> 
>  Master message types
>  
> @@ -664,7 +665,8 @@ Master message types
>Master payload: virtio device config space
>Slave payload: virtio device config space
> 
> -  Submitted by the vhost-user master to fetch the contents of the virtio
> +  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
> +  submitted by the vhost-user master to fetch the contents of the virtio
>device configuration space, vhost-user slave's payload size MUST match
>master's request, vhost-user slave uses zero length of payload to
>indicate an error to vhost-user master. The vhost-user master may
> @@ -677,7 +679,8 @@ Master message types
>Master payload: virtio device config space
>Slave payload: N/A
> 
> -  Submitted by the vhost-user master when the Guest changes the virtio
> +  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
> +  submitted by the vhost-user master when the Guest changes the virtio
>device configuration space and also can be used for live migration
>on the destination host. The vhost-user slave must check the flags
>field, and slaves MUST NOT accept SET_CONFIG for read-only
> @@ -766,13 +769,13 @@ Slave message types
>   Slave payload: N/A
>   Master payload: N/A
> 
> - Vhost-user slave sends such messages to notify that the virtio device's
> - configuration space has changed, for those host devices which can 
> support
> - such feature, host driver can send VHOST_USER_GET_CONFIG message to
> slave
> - to get the latest content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is
> - negotiated, and slave set the VHOST_USER_NEED_REPLY flag, master must
> - respond with zero when operation is successfully completed, or non-zero
> - otherwise.
> + When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, vhost-user slave
> sends
> + such messages to notify that the virtio device's configuration space has
> + changed, for those host devices which can support such feature, host
> + driver can send VHOST_USER_GET_CONFIG message to slave to get the latest
> + content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, and slave
> set
> + the VHOST_USER_NEED_REPLY flag, master must respond with zero when
> + operation is successfully completed, or non-zero otherwise.
> 
>  VHOST_USER_PROTOCOL_F_REPLY_ACK:
>  ---
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 44aea5c0a8..cc8a24aa31 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -46,6 +46,7 @@ enum VhostUserProtocolFeature {
>  VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
>  VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
>  VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
> +VHOST_USER_PROTOCOL_F_CONFIG = 9,
>  VHOST_USER_PROTOCOL_F_MAX
>  };
> 
> @@ -1211,6 +1212,17 @@ static int vhost_user_init(struct vhost_dev *dev, void
> *opaque)
> 
>  dev->protocol_features =
>  protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
> +
> +if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) 
> {
> +/* Dont acknowledge CONFIG feature if device doesn't support it 
> */
> +  

Re: [Qemu-devel] [PATCH v2 1/2] vhost-user-blk: set config ops before vhost-user init

2018-03-28 Thread Liu, Changpeng


> -Original Message-
> From: Maxime Coquelin [mailto:maxime.coque...@redhat.com]
> Sent: Thursday, March 29, 2018 3:28 AM
> To: m...@redhat.com; Liu, Changpeng ;
> marcandre.lur...@redhat.com; qemu-devel@nongnu.org
> Cc: Maxime Coquelin 
> Subject: [PATCH v2 1/2] vhost-user-blk: set config ops before vhost-user init
> 
> As soon as vhost-user init is done, the backend may send
> VHOST_USER_SLAVE_CONFIG_CHANGE_MSG, so let's set the
> notification callback before it.
> 
> Also, it will be used to know whether the device supports
> the config feature to advertize it or not.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  hw/block/vhost-user-blk.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index f840f07dfe..262baca432 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -259,6 +259,8 @@ static void vhost_user_blk_device_realize(DeviceState
> *dev, Error **errp)
>  s->dev.vq_index = 0;
>  s->dev.backend_features = 0;
> 
> +vhost_dev_set_config_notifier(>dev, _ops);
Please also remove the line "assert(hdev->vhost_ops);" in function 
vhost_dev_set_config_notifier at vhost.c file.
> +
>  ret = vhost_dev_init(>dev, >chardev, VHOST_BACKEND_TYPE_USER, 0);
>  if (ret < 0) {
>  error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> @@ -277,8 +279,6 @@ static void vhost_user_blk_device_realize(DeviceState
> *dev, Error **errp)
>  s->blkcfg.num_queues = s->num_queues;
>  }
> 
> -vhost_dev_set_config_notifier(>dev, _ops);
> -
>  return;
> 
>  vhost_err:
> --
> 2.14.3




Re: [Qemu-devel] [PATCH v4 2/2] tpm: extend TPM TIS with state migration support

2018-03-28 Thread Stefan Berger

On 03/28/2018 11:41 AM, Marc-André Lureau wrote:

Hi

On Thu, Mar 1, 2018 at 8:59 PM, Stefan Berger
 wrote:

+
+static const VMStateDescription vmstate_locty = {
+.name = "loc",
+.version_id = 1,
+.minimum_version_id = 0,
+.minimum_version_id_old = 0,

I don't understand the problem there is leaving all the version fields
to 0, just like CRB.


+.fields  = (VMStateField[]) {
+VMSTATE_UINT32(state, TPMLocality),
+VMSTATE_UINT32(inte, TPMLocality),
+VMSTATE_UINT32(ints, TPMLocality),
+VMSTATE_UINT8(access, TPMLocality),
+VMSTATE_UINT32(sts, TPMLocality),
+VMSTATE_UINT32(iface_id, TPMLocality),
+VMSTATE_END_OF_LIST(),
+}
+};
+
  static const VMStateDescription vmstate_tpm_tis = {
  .name = "tpm",
-.unmigratable = 1,
+.version_id = 1,
+.minimum_version_id = 0,
+.minimum_version_id_old = 0,

same

If you remove the version fields: Reviewed-by: Marc-André Lureau



This is the error I got when setting .version_id = 0 (on both) and doing 
a localhost migration


qemu-system-x86_64: Missing section footer for tpm-tis
qemu-system-x86_64: load of migration failed: Invalid argument

It must have something to do with the nesting invoked by 
VMSTATE_STRUCT_ARRAY(loc,...) below.









+.pre_save  = tpm_tis_pre_save,
+.fields = (VMStateField[]) {
+VMSTATE_BUFFER(buffer, TPMState),
+VMSTATE_UINT16(rw_offset, TPMState),
+VMSTATE_UINT8(active_locty, TPMState),
+VMSTATE_UINT8(aborting_locty, TPMState),
+VMSTATE_UINT8(next_locty, TPMState),
+
+VMSTATE_STRUCT_ARRAY(loc, TPMState, TPM_TIS_NUM_LOCALITIES, 1,
+ vmstate_locty, TPMLocality),
+
+VMSTATE_END_OF_LIST()
+}
  };

  static Property tpm_tis_properties[] = {
--
2.5.5






Re: [Qemu-devel] [PATCH v2 5/6] e1000: Choose which set of props to migrate

2018-03-28 Thread Ed Swierk
On Wed, Mar 28, 2018 at 9:36 AM, Dr. David Alan Gilbert (git)
 wrote:
> From: "Dr. David Alan Gilbert" 
>
> When we're using the subsection we migrate both
> the 'props' and 'tso_props' data; when we're not using
> the subsection (to migrate to 2.11 or old machine types) we've
> got to choose what to migrate in the main structure.
>
> If we're using the subsection migrate 'props' in the main structure.
> If we're not using the subsection then migrate the last one
> that changed, which gives behaviour similar to the old behaviour.
>
> Signed-off-by: Dr. David Alan Gilbert 

Acked-by: Ed Swierk 

> ---
>  hw/net/e1000.c | 18 +-
>  1 file changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/hw/net/e1000.c b/hw/net/e1000.c
> index 4e606d4b2a..13a9494a8d 100644
> --- a/hw/net/e1000.c
> +++ b/hw/net/e1000.c
> @@ -130,6 +130,7 @@ typedef struct E1000State_st {
>  #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
>  uint32_t compat_flags;
>  bool received_tx_tso;
> +bool use_tso_for_migration;
>  e1000x_txd_props mig_props;
>  } E1000State;
>
> @@ -622,9 +623,11 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
>  if (dtype == E1000_TXD_CMD_DEXT) {/* context descriptor */
>  if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
>  e1000x_read_tx_ctx_descr(xp, >tso_props);
> +s->use_tso_for_migration = 1;
>  tp->tso_frames = 0;
>  } else {
>  e1000x_read_tx_ctx_descr(xp, >props);
> +s->use_tso_for_migration = 0;
>  }
>  return;
>  } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
> @@ -1366,7 +1369,20 @@ static int e1000_pre_save(void *opaque)
>  s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
>  }
>
> -s->mig_props = s->tx.props;
> +/* Decide which set of props to migrate in the main structure */
> +if (chkflag(TSO) || !s->use_tso_for_migration) {
> +/* Either we're migrating with the extra subsection, in which
> + * case the mig_props is always 'props' OR
> + * we've not got the subsection, but 'props' was the last
> + * updated.
> + */
> +s->mig_props = s->tx.props;
> +} else {
> +/* We're not using the subsection, and 'tso_props' was
> + * the last updated.
> + */
> +s->mig_props = s->tx.tso_props;
> +}
>  return 0;
>  }
>
> --
> 2.14.3
>



Re: [Qemu-devel] [PATCH v2 09/14] hardfloat: support float32/64 multiplication

2018-03-28 Thread Emilio G. Cota
On Wed, Mar 28, 2018 at 14:26:30 +0100, Alex Bennée wrote:
> Emilio G. Cota  writes:
> OK I've had a bit more of a play and I think we can drop the macro abuse
> and have common wrappers for the host_fpu. We don't want to intermingle
> with the soft float slow path to stop the compiler adding overhead. We
> also need a wrapper for each float size and op count due to differences
> in the classify functions. However the boiler plate is pretty common and
> where there are differences the compiler is smart enough to fix it.
> 
> See branch:
> https://github.com/stsquad/qemu/tree/hostfloat/common-fpu-wrapper
> 
> I keep the numbers for add/sub and doubled the speed of float32_mul on
> my box, without any macros ;-)

I really like the idea of letting the compiler unfold everything.
In fact I just did that to re-implement fp-bench (now with support
for -t host/soft, yay).

> Full patch inline:
> 
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index d0f1f65c12..89217b5e67 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -879,56 +879,72 @@ soft_float64_sub(float64 a, float64 b, float_status 
> *status)
>  return float64_round_pack_canonical(pr, status);
>  }
(snip)
> +static float fpu_mul32(float a, float b, bool *nocheck) {
> +
> +if (float32_is_zero(a) || float32_is_zero(b)) {
> +bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
> +*nocheck = true;
> +return float32_set_sign((0), signbit);
> +} else {
> +float ha = float32_to_float(a);
> +float hb = float32_to_float(b);
> +float hr = ha * hb;
> +return hr;
>  }
> +}

This function is wrong :-(

Note that a and b are floats, not float32's. So if any of
them is 0.X then they get silently converted to 0, which goes via the
fast(er) path above. This explains the speedup.

Note that you could have caught this with:

  $ ./fp-test -t soft ibm/* -w whitelist.txt -e x

Compiling with -Wconversion would also point these out, but the output
is way too noisy to be useful.


That said, I'll take inspiration from your approach for v3--hopefully
without (many) macros this time round.

Thanks!

Emilio



Re: [Qemu-devel] [PATCH qemu] vfio: Print address space address when cannot map MMIO for DMA

2018-03-28 Thread Alex Williamson
On Wed, 28 Mar 2018 23:03:23 +0200
Auger Eric  wrote:

> Hi Alexey, Alex,
> On 22/03/18 09:18, Alexey Kardashevskiy wrote:
> > The 567b5b309abe ("vfio/pci: Relax DMA map errors for MMIO regions") added
> > an error message if a passed memory section address or size is not aligned
> > to the minimal IOMMU page size. However although it checks
> > offset_within_address_space for the alignment, offset_within_region is
> > printed instead which makes it harder to find out what device caused
> > the message so this replaces offset_within_region with
> > offset_within_address_space.
> > 
> > While we are here, this replaces '..' with 'size=' (as the second number
> > is a size, not an end offset) and adds a memory region name.
> > 
> > Fixes: 567b5b309abe "vfio/pci: Relax DMA map errors for MMIO regions"
> > Signed-off-by: Alexey Kardashevskiy   
> The patch indeed fixes the reported format issues.
> 
> However I have some other concerns with the info that is reported to the
> end-user. See below.
> 
> Assigning an e1000e device with a 64kB host, here are the traces I get:
> 
> Region XXX is not aligned to 0x1 and cannot be mapped for DMA
> 
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0x3fb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0xffb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0x3fb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0xffb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0x3fb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100a4808 size=0xb7f8
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100e0050 size=0x3fb0
> "0004:01:00.0 BAR 3 mmaps[0]" 0x100e4808 size=0xb7f8
> 
> It took me some time to understand what happens but here is now my
> understanding:
> 
> 1) When looking at vfio_pci_write_config() pdev->io_regions[bar].addr =
> bar_addr in vfio_sub_page_bar_update_mapping() I see the following values:
> 
> UNMAPPED -> 0x0 ->UNMAPPED -> 0x100a -> UNMAPPED -> 0x100a ->
> UNMAPPED -> 0x100e
> 
> vfio_sub_page_bar_update_mapping() create mrs with base bar at
> 0x100a and 0x100e successively, hence the
> vfio_listener_region_add on 0x100a. Indeed, 0x0-0x50 msix-table mmio
> region induces some memory section at 0x100a0050 and 0x100e50 successively.
> 
> However this is confusing for the end-user who only has access to the
> final mapping (0x100e) through lspi [1].
> 
> 2) The changes in the size (0x3fb0 <-> 0xffb0) relate to the extension
> of the 16kB bar to 64kB in vfio_sub_page_bar_update_mapping
> 
> 3) Also it happens that I have a virtio-scsi-pci device that is put just
> after the BAR3 at 0x100a4000 and 0x100e4000 successively. The device has
> its own msi-table and pba mmio regions[2]. As mmaps[0] is extended to
> 64kB (with prio 0), we have those MMIO regions which result in new
> memory sections, which cause vfio_listener_region_add calls. This
> typically explains why we get a warning on 0x100e4808 (0xb7f8). By the
> way I don't get why we don't have a trace for "0004:01:00.0 BAR 3
> mmaps[0]" 0x100e4040 size=0x7c0, ie. mmaps[0] space between
> virtio-scsi-pci msic-table and pba.
> 
> So at the end of the day, my fear is all those info become really
> frightening and confusing for the end-user and even not relevant
> (0x100a stuff). So I would rather simply remove the trace in 2.12
> until we find a place where we could generate a clear hint for the
> end-user, suggesting to relocate the msix bar.
> 
> Thoughts?

Yep, I think that's probably the right approach.  Everything works as
it should and nothing has worse access in 2.12 than it did in 2.11,
there's only the opportunity to make things better with msi-x
relocation and I don't think we want to error on the side of reporting
too many errors that users cannot understand in an attempt to advise
them of an unsupported option that might be better.  Let's remove the
error report for 2.12 and think about how we could make a concise
suggestion, once, while initializing the device.  Who's posting the
patch?  Thanks,

Alex

PS - Why isn't the firmware/kernel on aarch64 making an attempt to
align PCI resources on page boundaries?  Does
pci=realloc,resource_alignment=pci:: change it? (I'm
not sure if PCI_ANY_ID works for that option)



Re: [Qemu-devel] [PATCH] i386/kvm: add support for KVM_CAP_X86_DISABLE_EXITS

2018-03-28 Thread Michael S. Tsirkin
On Wed, Mar 28, 2018 at 03:31:23PM -0300, Eduardo Habkost wrote:
> On Wed, Mar 28, 2018 at 03:06:23AM +0300, Michael S. Tsirkin wrote:
> > On Tue, Mar 27, 2018 at 06:36:46PM -0300, Eduardo Habkost wrote:
> > > On Tue, Mar 27, 2018 at 10:42:56PM +0300, Michael S. Tsirkin wrote:
> > > > On Fri, Mar 16, 2018 at 07:36:42AM -0700, Wanpeng Li wrote:
> > > > > From: Wanpeng Li 
> > > > > 
> > > > > This patch adds support for KVM_CAP_X86_DISABLE_EXITS. Provides 
> > > > > userspace with 
> > > > > per-VM capability(KVM_CAP_X86_DISABLE_EXITS) to not intercept 
> > > > > MWAIT/HLT/PAUSE 
> > > > > in order that to improve latency in some workloads.
> > > > > 
> > > [...]
> > > > > diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> > > > > index d23fff1..95ed9eb 100644
> > > > > --- a/target/i386/kvm.c
> > > > > +++ b/target/i386/kvm.c
> > > > > @@ -999,6 +999,18 @@ int kvm_arch_init_vcpu(CPUState *cs)
> > > > >  }
> > > > >  }
> > > > >  
> > > > > +if (env->features[FEAT_KVM_HINTS] & KVM_HINTS_DEDICATED) {
> > > > > +int disable_exits = kvm_check_extension(cs->kvm_state, 
> > > > > KVM_CAP_X86_DISABLE_EXITS);
> > > > > +if (disable_exits) {
> > > > > +disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT |
> > > > > +  KVM_X86_DISABLE_EXITS_HLT |
> > > > > +  KVM_X86_DISABLE_EXITS_PAUSE);
> > > > > +}
> > > > > +if (kvm_vm_enable_cap(cs->kvm_state, 
> > > > > KVM_CAP_X86_DISABLE_EXITS, 0, disable_exits)) {
> > > > > +error_report("kvm: DISABLE EXITS not supported");
> > > > > +}
> > > > > +}
> > > > > +
> > > > >  qemu_add_vm_change_state_handler(cpu_update_state, env);
> > > > >  
> > > > >  c = cpuid_find_entry(_data.cpuid, 1, 0);
> > > > 
> > > > Why not a bit per capability?
> > > > I can see how someone might want to disable mwait exists
> > > > but not the rest of them.
> > > 
> > > kvm-hint-dedicated=on should be used only if the physical CPU is
> > > dedicated to the VCPU task.  Are there any advantages of getting
> > > vmexits for HLT and PAUSE if no other task is going to use the
> > > CPU?
> > 
> > No but there are advantages to using mwait even without a dedicated host
> > CPU (VCPUs can wake up each other without exiting to hypervisor).
> 
> Are there any downsides?  What needs to be taken into account
> when deciding if mwait exits can be safely disabled?

Exit might take longer as it might need to wake up CPU
from a deep C state.
So one needs to know which C states are enabled on host
and whether any tasks on the same CPU are latency sensitive.

> 
> > 
> > Which is my point - there should be a separate flag to disable mwait
> > exiting only.
> 
> Adding new command-line option is possible, but not necessary for
> the dedicated-CPU use case.  This means this patch is already
> useful without adding new flags.

True.

> -- 
> Eduardo



Re: [Qemu-devel] [PATCH v5 for 2.13 3/4] docs: tpm: add VM save/restore example and troubleshooting guide

2018-03-28 Thread Stefan Berger

On 03/28/2018 11:48 AM, Marc-André Lureau wrote:

Hi

On Fri, Mar 16, 2018 at 10:46 PM, Stefan Berger
 wrote:

Extend the docs related to TPM with specs related to VM save and
restore and a troubleshooting guide for TPM migration.


Thanks a lot for writing this! some questions below


Signed-off-by: Stefan Berger 
---
  docs/specs/tpm.txt | 106 +
  1 file changed, 106 insertions(+)

diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
index d1d7157..c230c4c 100644
--- a/docs/specs/tpm.txt
+++ b/docs/specs/tpm.txt
@@ -200,3 +200,109 @@ crw---. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0
  PCR-00: 35 4E 3B CE 23 9F 38 59 ...
  ...
  PCR-23: 00 00 00 00 00 00 00 00 ...
+
+
+=== Migration with the TPM emulator ===
+
+The TPM emulator supports the following types of virtual machine migration:
+
+- VM save / restore (migration into a file)
+- Network migration
+- Snapshotting (migration into storage like QoW2 or QED)
+
+The following command sequences can be used to test VM save / restore.
+
+
+In a 1st terminal start an instance of a swtpm using the following command:
+
+mkdir /tmp/mytpm1
+swtpm socket --tpmstate dir=/tmp/mytpm1 \
+  --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \
+  --log level=20 --tpm2
+
+In a 2nd terminal start the VM:
+
+qemu-system-x86_64 -display sdl -enable-kvm \
+  -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
+  -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \
+  -tpmdev emulator,id=tpm0,chardev=chrtpm \
+  -device tpm-tis,tpmdev=tpm0 \
+  -monitor stdio \
+  test.img
+
+Verify that the attached TPM is working as expected using applications inside
+the VM.
+
+To store the state of the VM use the following command in the QEMU monitor in
+the 2nd terminal:
+
+(qemu) migrate "exec:cat > testvm.bin"
+(qemu) quit
+
+At this point a file called 'testvm.bin' should exists and the swtpm and QEMU
+processes should have ended.

When is swtpm ending, when qemu leaves? Hopefully you can do several
migrate commands.


Yes, QEMU does not send it the signal to shut down. We can fall back to 
the source if the destination fails.





+
+To test 'VM restore' you have to start the swtpm with the same parameters
+as before. If previously a TPM 2 [--tpm2] was saved, --tpm2 must now be
+passed again on the command line.
+
+In the 1st terminal restart the swtpm with the same command line as before:
+
+swtpm socket --tpmstate dir=/tmp/mytpm1 \
+  --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \
+  --log level=20 --tpm2

Does that mean the tpmstate directory content must be the same and
thus migrated as well? Can in be empty in the destination? If not,
what should be done to initialize it? Could it be empty instead?


QEMU migrates the state of the TPM with the CMD_GET_STATEBLOB to 
retrieve the state blobs and CMD_SET_STATEBLOB to set them on the 
destination. The destination only needs to have the TPM running but the 
directory must have been created (--tpmstate dir=...).


One can try this with localhost migration over the network as well, but 
I didn't want to show this more complicated scenario in the doc:


destination QEMU:
sudo ./x86_64-softmmu/qemu-system-x86_64 -vnc :11 -enable-kvm -m 1024 
-smp 8 -boot d -L /usr/share/seabios -bios bios-256k.bin -boot menu=on 
-chardev socket,id=chrtpm,path=/tmp/mytpm2/ctrl.sock -tpmdev 
emulator,id=tpm0,chardev=chrtpm -device tpm-tis,tpmdev=tpm0 -monitor 
stdio -chardev file,id=pts2,path=/tmp/seabios.log -device 
isa-serial,chardev=pts2 /var/lib/libvirt/images/FC27 -incoming "exec:nc 
-l 127.0.0.1 12345"


source QEMU:
sudo ./x86_64-softmmu/qemu-system-x86_64 -vnc :10 -enable-kvm -m 1024 
-smp 8 -boot d -L /usr/share/seabios -bios bios-256k.bin -boot menu=on 
-chardev socket,id=chrtpm,path=/tmp/mytpm1/ctrl.sock -tpmdev 
emulator,id=tpm0,chardev=chrtpm -device tpm-tis,tpmdev=tpm0 -monitor 
stdio -chardev file,id=pts2,path=/tmp/seabios.log -device 
isa-serial,chardev=pts2  /var/lib/libvirt/images/FC27


(qemu) migrate "exec:nc 127.0.0.1 12345"

Just tested again.

Stefan



+
+In the 2nd terminal restore the state of the VM using the additonal
+'-incoming' option.
+
+qemu-system-x86_64 -display sdl -enable-kvm \
+  -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
+  -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \
+  -tpmdev emulator,id=tpm0,chardev=chrtpm \
+  -device tpm-tis,tpmdev=tpm0 \
+  -incoming "exec:cat < testvm.bin" \
+  test.img
+
+
+Troubleshooting migration:
+
+There are several reasons why migration may fail. In case of problems,
+please ensure that the command lines adhere to the following rules and,
+if possible, that identical versions of QEMU and swtpm are used at all
+times.
+
+VM save and restore:
+ - QEMU command line parameters should be identical apart from the
+   '-incoming' option on VM restore
+ - swtpm command line parameters should be identical
+
+VM migration to 'localhost':
+ - QEMU command 

Re: [Qemu-devel] [PATCH qemu] vfio: Print address space address when cannot map MMIO for DMA

2018-03-28 Thread Auger Eric
Hi Alexey, Alex,
On 22/03/18 09:18, Alexey Kardashevskiy wrote:
> The 567b5b309abe ("vfio/pci: Relax DMA map errors for MMIO regions") added
> an error message if a passed memory section address or size is not aligned
> to the minimal IOMMU page size. However although it checks
> offset_within_address_space for the alignment, offset_within_region is
> printed instead which makes it harder to find out what device caused
> the message so this replaces offset_within_region with
> offset_within_address_space.
> 
> While we are here, this replaces '..' with 'size=' (as the second number
> is a size, not an end offset) and adds a memory region name.
> 
> Fixes: 567b5b309abe "vfio/pci: Relax DMA map errors for MMIO regions"
> Signed-off-by: Alexey Kardashevskiy 
The patch indeed fixes the reported format issues.

However I have some other concerns with the info that is reported to the
end-user. See below.

Assigning an e1000e device with a 64kB host, here are the traces I get:

Region XXX is not aligned to 0x1 and cannot be mapped for DMA

"0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0x3fb0
"0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0xffb0
"0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0x3fb0
"0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0xffb0
"0004:01:00.0 BAR 3 mmaps[0]" 0x100a0050 size=0x3fb0
"0004:01:00.0 BAR 3 mmaps[0]" 0x100a4808 size=0xb7f8
"0004:01:00.0 BAR 3 mmaps[0]" 0x100e0050 size=0x3fb0
"0004:01:00.0 BAR 3 mmaps[0]" 0x100e4808 size=0xb7f8

It took me some time to understand what happens but here is now my
understanding:

1) When looking at vfio_pci_write_config() pdev->io_regions[bar].addr =
bar_addr in vfio_sub_page_bar_update_mapping() I see the following values:

UNMAPPED -> 0x0 ->UNMAPPED -> 0x100a -> UNMAPPED -> 0x100a ->
UNMAPPED -> 0x100e

vfio_sub_page_bar_update_mapping() create mrs with base bar at
0x100a and 0x100e successively, hence the
vfio_listener_region_add on 0x100a. Indeed, 0x0-0x50 msix-table mmio
region induces some memory section at 0x100a0050 and 0x100e50 successively.

However this is confusing for the end-user who only has access to the
final mapping (0x100e) through lspi [1].

2) The changes in the size (0x3fb0 <-> 0xffb0) relate to the extension
of the 16kB bar to 64kB in vfio_sub_page_bar_update_mapping

3) Also it happens that I have a virtio-scsi-pci device that is put just
after the BAR3 at 0x100a4000 and 0x100e4000 successively. The device has
its own msi-table and pba mmio regions[2]. As mmaps[0] is extended to
64kB (with prio 0), we have those MMIO regions which result in new
memory sections, which cause vfio_listener_region_add calls. This
typically explains why we get a warning on 0x100e4808 (0xb7f8). By the
way I don't get why we don't have a trace for "0004:01:00.0 BAR 3
mmaps[0]" 0x100e4040 size=0x7c0, ie. mmaps[0] space between
virtio-scsi-pci msic-table and pba.

So at the end of the day, my fear is all those info become really
frightening and confusing for the end-user and even not relevant
(0x100a stuff). So I would rather simply remove the trace in 2.12
until we find a place where we could generate a clear hint for the
end-user, suggesting to relocate the msix bar.

Thoughts?

Thanks

Eric


[1] lspci

Region 3: Memory at 104e (32-bit, non-prefetchable) [size=16K]
Expansion ROM at 1048 [disabled] [size=256K]
../..
Capabilities: [a0] MSI-X: Enable+ Count=5 Masked-
Vector table: BAR=3 offset=
  PBA: BAR=3 offset=2000

[2] info mtree (final)

100e-100e (prio 0, i/o): 0004:01:00.0 base BAR 3
 100e-100e004f (prio 0, i/o): msix-table
 100e-100e (prio 0, i/o): 0004:01:00.0 BAR 3
  100e-100e (prio 0, ramd): 0004:01:00.0 BAR 3 mmaps[0]
  100e2000-100e2007 (prio 0, i/o): msix-pba [disabled]
100e4000-100e4fff (prio 1, i/o): virtio-scsi-pci-msix
 100e4000-100e403f (prio 0, i/o): msix-table
 100e4800-100e4807 (prio 0, i/o): msix-pba







> ---
> 
> Message on slightly hacked QEMU (iommu pagesize=8K) looks now like this:
> 
> qemu-system-x86_64: Region ":00:1a.0 BAR 0 mmaps[0]" 0xfebc 
> size=0x1000 is not aligned to 0x2000 and cannot be mapped for DMA
> ---
>  hw/vfio/common.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 5e84716..e2db596 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -548,10 +548,11 @@ static void vfio_listener_region_add(MemoryListener 
> *listener,
>  hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
>  
>  if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
> -error_report("Region 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx
> +error_report("Region \"%s\" 0x%"HWADDR_PRIx" size=0x%"HWADDR_PRIx
>   " is not aligned to 0x%"HWADDR_PRIx
>   " and cannot be mapped 

[Qemu-devel] [PATCH 4/4] tests: Tests more flags of the CRB interface

2018-03-28 Thread Stefan Berger
Test and modify more flags of the CRB interface.

Signed-off-by: Stefan Berger 
---
 tests/tpm-crb-test.c | 74 ++--
 1 file changed, 72 insertions(+), 2 deletions(-)

diff --git a/tests/tpm-crb-test.c b/tests/tpm-crb-test.c
index e1513cb..d8f9569 100644
--- a/tests/tpm-crb-test.c
+++ b/tests/tpm-crb-test.c
@@ -28,6 +28,10 @@ static void tpm_crb_test(const void *data)
 uint64_t caddr = readq(TPM_CRB_ADDR_BASE + A_CRB_CTRL_CMD_LADDR);
 uint32_t rsize = readl(TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_SIZE);
 uint64_t raddr = readq(TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_ADDR);
+uint8_t locstate = readb(TPM_CRB_ADDR_BASE + A_CRB_LOC_STATE);
+uint32_t locctrl = readl(TPM_CRB_ADDR_BASE + A_CRB_LOC_CTRL);
+uint32_t locsts = readl(TPM_CRB_ADDR_BASE + A_CRB_LOC_STS);
+uint32_t sts = readl(TPM_CRB_ADDR_BASE + A_CRB_CTRL_STS);
 
 g_assert_cmpint(FIELD_EX32(intfid, CRB_INTF_ID, InterfaceType), ==, 1);
 g_assert_cmpint(FIELD_EX32(intfid, CRB_INTF_ID, InterfaceVersion), ==, 1);
@@ -45,9 +49,47 @@ static void tpm_crb_test(const void *data)
 g_assert_cmpint(caddr, >, TPM_CRB_ADDR_BASE);
 g_assert_cmpint(raddr, >, TPM_CRB_ADDR_BASE);
 
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, tpmEstablished), ==, 
1);
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, locAssigned), ==, 0);
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, activeLocality), ==, 
0);
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, reserved), ==, 0);
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, tpmRegValidSts), ==, 
1);
+
+g_assert_cmpint(locctrl, ==, 0);
+
+g_assert_cmpint(FIELD_EX32(locsts, CRB_LOC_STS, Granted), ==, 0);
+g_assert_cmpint(FIELD_EX32(locsts, CRB_LOC_STS, beenSeized), ==, 0);
+
+g_assert_cmpint(FIELD_EX32(sts, CRB_CTRL_STS, tpmIdle), ==, 1);
+g_assert_cmpint(FIELD_EX32(sts, CRB_CTRL_STS, tpmSts), ==, 0);
+
+/* request access to locality 0 */
+writeb(TPM_CRB_ADDR_BASE + A_CRB_LOC_CTRL, 1);
+
+/* granted bit must be set now */
+locsts = readl(TPM_CRB_ADDR_BASE + A_CRB_LOC_STS);
+g_assert_cmpint(FIELD_EX32(locsts, CRB_LOC_STS, Granted), ==, 1);
+g_assert_cmpint(FIELD_EX32(locsts, CRB_LOC_STS, beenSeized), ==, 0);
+
+/* we must have an assigned locality */
+locstate = readb(TPM_CRB_ADDR_BASE + A_CRB_LOC_STATE);
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, tpmEstablished), ==, 
1);
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, locAssigned), ==, 1);
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, activeLocality), ==, 
0);
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, reserved), ==, 0);
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, tpmRegValidSts), ==, 
1);
+
+/* set into ready state */
+writel(TPM_CRB_ADDR_BASE + A_CRB_CTRL_REQ, 1);
+
+/* TPM must not be in the idle state */
+sts = readl(TPM_CRB_ADDR_BASE + A_CRB_CTRL_STS);
+g_assert_cmpint(FIELD_EX32(sts, CRB_CTRL_STS, tpmIdle), ==, 0);
+g_assert_cmpint(FIELD_EX32(sts, CRB_CTRL_STS, tpmSts), ==, 0);
+
 memwrite(caddr, TPM_CMD, sizeof(TPM_CMD));
 
-uint32_t sts, start = 1;
+uint32_t start = 1;
 uint64_t end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
 writel(TPM_CRB_ADDR_BASE + A_CRB_CTRL_START, start);
 do {
@@ -58,12 +100,40 @@ static void tpm_crb_test(const void *data)
 } while (g_get_monotonic_time() < end_time);
 start = readl(TPM_CRB_ADDR_BASE + A_CRB_CTRL_START);
 g_assert_cmpint(start & 1, ==, 0);
+
+/* TPM must still not be in the idle state */
 sts = readl(TPM_CRB_ADDR_BASE + A_CRB_CTRL_STS);
-g_assert_cmpint(sts & 1, ==, 0);
+g_assert_cmpint(FIELD_EX32(sts, CRB_CTRL_STS, tpmIdle), ==, 0);
+g_assert_cmpint(FIELD_EX32(sts, CRB_CTRL_STS, tpmSts), ==, 0);
 
 struct tpm_hdr tpm_msg;
 memread(raddr, _msg, sizeof(tpm_msg));
 g_assert_cmpmem(_msg, sizeof(tpm_msg), s->tpm_msg, 
sizeof(*s->tpm_msg));
+
+/* set TPM into idle state */
+writel(TPM_CRB_ADDR_BASE + A_CRB_CTRL_REQ, 2);
+
+/* idle state must be indicated now */
+sts = readl(TPM_CRB_ADDR_BASE + A_CRB_CTRL_STS);
+g_assert_cmpint(FIELD_EX32(sts, CRB_CTRL_STS, tpmIdle), ==, 1);
+g_assert_cmpint(FIELD_EX32(sts, CRB_CTRL_STS, tpmSts), ==, 0);
+
+/* relinquish locality */
+writel(TPM_CRB_ADDR_BASE + A_CRB_LOC_CTRL, 2);
+
+/* Granted flag must be cleared */
+sts = readl(TPM_CRB_ADDR_BASE + A_CRB_LOC_STS);
+g_assert_cmpint(FIELD_EX32(sts, CRB_LOC_STS, Granted), ==, 0);
+g_assert_cmpint(FIELD_EX32(sts, CRB_LOC_STS, beenSeized), ==, 0);
+
+/* no locality may be assigned */
+locstate = readb(TPM_CRB_ADDR_BASE + A_CRB_LOC_STATE);
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, tpmEstablished), ==, 
1);
+g_assert_cmpint(FIELD_EX32(locstate, CRB_LOC_STATE, locAssigned), ==, 0);
+g_assert_cmpint(FIELD_EX32(locstate, 

[Qemu-devel] [PATCH 1/4] tpm: CRB: set the Idle flag by default

2018-03-28 Thread Stefan Berger
Signed-off-by: Stefan Berger 
---
 hw/tpm/tpm_crb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
index ef8b80e..e728b55 100644
--- a/hw/tpm/tpm_crb.c
+++ b/hw/tpm/tpm_crb.c
@@ -220,6 +220,8 @@ static void tpm_crb_reset(void *dev)
 
 ARRAY_FIELD_DP32(s->regs, CRB_LOC_STATE,
  tpmRegValidSts, 1);
+ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
+ tpmIdle, 1);
 ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
  InterfaceType, CRB_INTF_TYPE_CRB_ACTIVE);
 ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-- 
2.5.5




[Qemu-devel] [PATCH 0/4] tpm: More improvements on CRB interface

2018-03-28 Thread Stefan Berger
This patch improves yet more flags of the TPM CRB interface and adds
more test to the CRB test case. Ideally we could have this for 2.12.
I tested it with UEFI and it works as before.

   Stefan

Stefan Berger (4):
  tpm: CRB: set the Idle flag by default
  tpm: CRB: Reset Granted flag when relinquishing locality
  tpm: CRB: Enforce locality is requested before processing buffer
  tests: Tests more flags of the CRB interface

 hw/tpm/tpm_crb.c | 17 +++-
 tests/tpm-crb-test.c | 74 ++--
 2 files changed, 88 insertions(+), 3 deletions(-)

-- 
2.5.5




[Qemu-devel] [PATCH 3/4] tpm: CRB: Enforce locality is requested before processing buffer

2018-03-28 Thread Stefan Berger
Section 5.5.3.2.2 of the CRB specs states that use of the TPM
through the localty control method must first be requested,
otherwise TPM commands will be dropped. This patch makes sure
that the current locality is the active locality and only then
sends off the command for processing.

Signed-off-by: Stefan Berger 
---
 hw/tpm/tpm_crb.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
index ee6c87e..520e74e 100644
--- a/hw/tpm/tpm_crb.c
+++ b/hw/tpm/tpm_crb.c
@@ -76,6 +76,8 @@ enum crb_cancel {
 CRB_CANCEL_INVOKE = BIT(0),
 };
 
+#define TPM_CRB_NO_LOCALITY 0xff
+
 static uint64_t tpm_crb_mmio_read(void *opaque, hwaddr addr,
   unsigned size)
 {
@@ -95,10 +97,18 @@ static uint64_t tpm_crb_mmio_read(void *opaque, hwaddr addr,
 return val;
 }
 
+static uint8_t tpm_crb_get_active_locty(CRBState *s)
+{
+if (!ARRAY_FIELD_EX32(s->regs, CRB_LOC_STATE, locAssigned))
+ return TPM_CRB_NO_LOCALITY;
+return ARRAY_FIELD_EX32(s->regs, CRB_LOC_STATE, activeLocality);
+}
+
 static void tpm_crb_mmio_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
 {
 CRBState *s = CRB(opaque);
+uint8_t locty =  addr >> 12;
 
 trace_tpm_crb_mmio_write(addr, size, val);
 
@@ -123,7 +133,8 @@ static void tpm_crb_mmio_write(void *opaque, hwaddr addr,
 break;
 case A_CRB_CTRL_START:
 if (val == CRB_START_INVOKE &&
-!(s->regs[R_CRB_CTRL_START] & CRB_START_INVOKE)) {
+!(s->regs[R_CRB_CTRL_START] & CRB_START_INVOKE) &&
+tpm_crb_get_active_locty(s) == locty) {
 void *mem = memory_region_get_ram_ptr(>cmdmem);
 
 s->regs[R_CRB_CTRL_START] |= CRB_START_INVOKE;
-- 
2.5.5




[Qemu-devel] [PATCH 2/4] tpm: CRB: Reset Granted flag when relinquishing locality

2018-03-28 Thread Stefan Berger
Reset the Granted flag when relinquishing a locality.

Signed-off-by: Stefan Berger 
---
 hw/tpm/tpm_crb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
index e728b55..ee6c87e 100644
--- a/hw/tpm/tpm_crb.c
+++ b/hw/tpm/tpm_crb.c
@@ -145,6 +145,8 @@ static void tpm_crb_mmio_write(void *opaque, hwaddr addr,
 case CRB_LOC_CTRL_RELINQUISH:
 ARRAY_FIELD_DP32(s->regs, CRB_LOC_STATE,
  locAssigned, 0);
+ARRAY_FIELD_DP32(s->regs, CRB_LOC_STS,
+ Granted, 0);
 break;
 case CRB_LOC_CTRL_REQUEST_ACCESS:
 ARRAY_FIELD_DP32(s->regs, CRB_LOC_STS,
-- 
2.5.5




Re: [Qemu-devel] [PATCH 2/2] target/riscv: Mark MSTATUS_FS dirty

2018-03-28 Thread Michael Clark
On Tue, Mar 27, 2018 at 7:22 PM, Richard Henderson <
richard.hender...@linaro.org> wrote:

> Writes to the FP register file mark the register file as dirty.
>
> Signed-off-by: Richard Henderson 
>

Reviewed-by: Michael Clark 
Tested-by: Michael Clark 


> ---
>  target/riscv/op_helper.c | 25 +
>  target/riscv/translate.c | 40 +++-
>  2 files changed, 56 insertions(+), 9 deletions(-)
>
> diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
> index e34715df4e..74eeef0be8 100644
> --- a/target/riscv/op_helper.c
> +++ b/target/riscv/op_helper.c
> @@ -72,11 +72,20 @@ void helper_raise_exception(CPURISCVState *env,
> uint32_t exception)
>  do_raise_exception_err(env, exception, 0);
>  }
>
> -static void validate_mstatus_fs(CPURISCVState *env, uintptr_t ra)
> +static void validate_mstatus_fs(CPURISCVState *env, uintptr_t ra, bool
> write)
>  {
>  #ifndef CONFIG_USER_ONLY
> -if (!(env->mstatus & MSTATUS_FS)) {
> +switch (get_field(env->mstatus, MSTATUS_FS)) {
> +case 0: /* disabled */
>  do_raise_exception_err(env, RISCV_EXCP_ILLEGAL_INST, ra);
> +g_assert_not_reached();
> +case 1: /* initial */
> +case 2: /* clean */
> +if (write) {
> +/* Mark fp status as dirty.  */
> +env->mstatus = MSTATUS_FS;
> +}
> +break;
>  }
>  #endif
>  }
> @@ -96,15 +105,15 @@ void csr_write_helper(CPURISCVState *env,
> target_ulong val_to_write,
>
>  switch (csrno) {
>  case CSR_FFLAGS:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), true);
>  cpu_riscv_set_fflags(env, val_to_write & (FSR_AEXC >>
> FSR_AEXC_SHIFT));
>  break;
>  case CSR_FRM:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), true);
>  env->frm = val_to_write & (FSR_RD >> FSR_RD_SHIFT);
>  break;
>  case CSR_FCSR:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), true);
>  env->frm = (val_to_write & FSR_RD) >> FSR_RD_SHIFT;
>  cpu_riscv_set_fflags(env, (val_to_write & FSR_AEXC) >>
> FSR_AEXC_SHIFT);
>  break;
> @@ -379,13 +388,13 @@ target_ulong csr_read_helper(CPURISCVState *env,
> target_ulong csrno)
>
>  switch (csrno) {
>  case CSR_FFLAGS:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), false);
>  return cpu_riscv_get_fflags(env);
>  case CSR_FRM:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), false);
>  return env->frm;
>  case CSR_FCSR:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), false);
>  return (cpu_riscv_get_fflags(env) << FSR_AEXC_SHIFT)
>  | (env->frm << FSR_RD_SHIFT);
>  /* rdtime/rdtimeh is trapped and emulated by bbl in system mode */
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index a30724aa90..08fc42a679 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -660,6 +660,31 @@ static void gen_store(DisasContext *ctx, uint32_t
> opc, int rs1, int rs2,
>  tcg_temp_free(dat);
>  }
>
> +#ifndef CONFIG_USER_ONLY
> +/* The states of mstatus_fs are:
> + * 0 = disabled, 1 = initial, 2 = clean, 3 = dirty
> + * We will have already diagnosed disabled state,
> + * and need to turn initial/clean into dirty.
> + */
> +static void mark_fs_dirty(DisasContext *ctx)
> +{
> +TCGv tmp;
> +if (ctx->mstatus_fs == MSTATUS_FS) {
> +return;
> +}
> +/* Remember the state change for the rest of the TB.  */
> +ctx->mstatus_fs = MSTATUS_FS;
> +
> +tmp = tcg_temp_new();
> +tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
> +tcg_gen_ori_tl(tmp, tmp, MSTATUS_FS);
> +tcg_gen_st_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
> +tcg_temp_free(tmp);
> +}
> +#else
> +static inline void mark_fs_dirty(DisasContext *ctx) { }
> +#endif
> +
>  static void gen_fp_load(DisasContext *ctx, uint32_t opc, int rd,
>  int rs1, target_long imm)
>  {
> @@ -688,6 +713,8 @@ static void gen_fp_load(DisasContext *ctx, uint32_t
> opc, int rd,
>  break;
>  }
>  tcg_temp_free(t0);
> +
> +mark_fs_dirty(ctx);
>  }
>
>  static void gen_fp_store(DisasContext *ctx, uint32_t opc, int rs1,
> @@ -985,6 +1012,7 @@ static void gen_fp_arith(DisasContext *ctx, uint32_t
> opc, int rd,
>   int rs1, int rs2, int rm)
>  {
>  TCGv t0 = NULL;
> +bool fp_output = true;
>
>  if (ctx->mstatus_fs == 0) {
>  goto do_illegal;
> @@ -1047,6 +1075,7 @@ static void gen_fp_arith(DisasContext *ctx, uint32_t
> opc, int rd,
>  }
>  gen_set_gpr(rd, t0);
>  tcg_temp_free(t0);
> +fp_output = false;
>  break;
>

Re: [Qemu-devel] [PATCH] WHPX fixes an issue with CPUID 1 not returning CPUID_EXT_HYPERVISOR

2018-03-28 Thread Justin Terry (VM) via Qemu-devel
Hey Eduardo

Responses inline. Thanks!

> -Original Message-
> From: Eduardo Habkost 
> Sent: Wednesday, March 28, 2018 10:51 AM
> To: Justin Terry (VM) 
> Cc: qemu-devel@nongnu.org; pbonz...@redhat.com; r...@twiddle.net
> Subject: Re: [PATCH] WHPX fixes an issue with CPUID 1 not returning
> CPUID_EXT_HYPERVISOR
> 
> On Mon, Mar 26, 2018 at 10:06:58AM -0700, Justin Terry (VM) wrote:
> > Implements the CPUID trap for CPUID 1 to include the
> > CPUID_EXT_HYPERVISOR flag in the ECX results. This was preventing some
> > older linux kernels from booting when trying to access MSR's that dont
> > make sense when virtualized.
> >
> > Signed-off-by: Justin Terry (VM) 
> > ---
> >  target/i386/whpx-all.c | 79
> > +-
> >  1 file changed, 78 insertions(+), 1 deletion(-)
> >
> > diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c index
> > bf33d320bf..58435178a4 100644
> > --- a/target/i386/whpx-all.c
> > +++ b/target/i386/whpx-all.c
> > @@ -911,12 +911,62 @@ static int whpx_vcpu_run(CPUState *cpu)
> >  ret = 1;
> >  break;
> >
> > +case WHvRunVpExitReasonX64Cpuid: {
> > +WHV_REGISTER_VALUE reg_values[5] = {0};
> > +WHV_REGISTER_NAME reg_names[5];
> > +UINT32 reg_count = 5;
> > +UINT64 rip, rax, rcx, rdx, rbx;
> > +
> > +rip = vcpu->exit_ctx.VpContext.Rip +
> > +  vcpu->exit_ctx.VpContext.InstructionLength;
> > +switch (vcpu->exit_ctx.CpuidAccess.Rax) {
> > +case 1:
> > +rax = vcpu->exit_ctx.CpuidAccess.DefaultResultRax;
> > +/* Advertise that we are running on a hypervisor */
> > +rcx =
> > +vcpu->exit_ctx.CpuidAccess.DefaultResultRcx |
> > +CPUID_EXT_HYPERVISOR;
> > +
> > +rdx = vcpu->exit_ctx.CpuidAccess.DefaultResultRdx;
> > +rbx = vcpu->exit_ctx.CpuidAccess.DefaultResultRbx;
> > +break;
> > +default:
> > +rax = vcpu->exit_ctx.CpuidAccess.DefaultResultRax;
> > +rcx = vcpu->exit_ctx.CpuidAccess.DefaultResultRcx;
> > +rdx = vcpu->exit_ctx.CpuidAccess.DefaultResultRdx;
> > +rbx = vcpu->exit_ctx.CpuidAccess.DefaultResultRbx;
> 
> Interesting, so the WHPX API already tries to provide default values for the
> CPUID leaves.  Would it make sense to try and use the values returned by
> cpu_x86_cpuid() in the future?
> 
> Is there a way to get the default CPUID results from the WHPX API without
> calling WHvRunVirtualProcessor(), so QEMU can be aware of what exactly
> the guest is seeing on CPUID?

The platform now has two ways to interact with CPUID.

1. (As the code is doing now). At partition creation time you can register for 
specific CPUID exits and then respond to the CPUID with your custom answer or 
with the Hypervisor defaults that were forwarded to you. Unfortunately, QEMU 
has no way to know the Hypervisor default ahead of time but QEMU can make at 
least make a runtime decision about how to respond.
2. At partition creation time the platform allows QEMU to inject (set) the 
default responses for specific CPUID exits. This can now be done by setting the 
 `WHV_X64_CPUID_RESULT` in the `CpuidResultList` of `WHV_PARTITION_PROPERTY` to 
the exit values QEMU wants. So effectively you can know the answers ahead of 
time for any that you set but the answers are not dynamic.

The only issues/questions I have there are:

If we use [1] (like the code is now) I don't see any way to keep the exits in 
cpu_x86_cpuid() matched up with the registered exits to WHPX. This means that 
WHPX would not be called in these cases and would instead get the Hypervisor 
default rather than the answer from cpu_x86_cpuid().

If we use [2] to inject the answers at creation time WHPX needs access to the 
CPUX86State at accel init which also doesn't seem to be possible in QEMU today. 
WHPX could basically just call cpu_x86_cpuid() for each CPUID QEMU cares about 
and plumb the answer before start. This has the best performance as we avoid 
the additional exits but has an issue in that the results must be known ahead 
of time.

And, we could obviously use a hybrid of the two for cases we know. Do you have 
any ideas that I could try out here on how you would like to see this work?

Thanks,
Justin

> 
> 
> > +}
> > +
> > +reg_names[0] = WHvX64RegisterRip;
> > +reg_names[1] = WHvX64RegisterRax;
> > +reg_names[2] = WHvX64RegisterRcx;
> > +reg_names[3] = WHvX64RegisterRdx;
> > +reg_names[4] = WHvX64RegisterRbx;
> > +
> > +reg_values[0].Reg64 = rip;
> > +reg_values[1].Reg64 = rax;
> > +reg_values[2].Reg64 = rcx;
> > +reg_values[3].Reg64 = rdx;
> > +

Re: [Qemu-devel] [PATCH 1/2] target/riscv: Split out mstatus_fs from tb_flags during translation

2018-03-28 Thread Michael Clark
On Tue, Mar 27, 2018 at 7:22 PM, Richard Henderson <
richard.hender...@linaro.org> wrote:

> We will want to track changes to mstatus_fs through the TB.
> As there is nothing else in tb_flags at the moment, remove
> the variable from DisasContext.
>
> Signed-off-by: Richard Henderson 
>

Reviewed-by: Michael Clark 

---
>  target/riscv/cpu.h   |  6 +++---
>  target/riscv/translate.c | 10 +-
>  2 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 41e06ac0f9..d201dd3e90 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -269,8 +269,8 @@ void QEMU_NORETURN do_raise_exception_err(CPURISCVState
> *env,
>  target_ulong cpu_riscv_get_fflags(CPURISCVState *env);
>  void cpu_riscv_set_fflags(CPURISCVState *env, target_ulong);
>
> -#define TB_FLAGS_MMU_MASK  3
> -#define TB_FLAGS_FP_ENABLE MSTATUS_FS
> +#define TB_FLAGS_MMU_MASK   3
> +#define TB_FLAGS_MSTATUS_FS MSTATUS_FS
>
>  static inline void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong
> *pc,
>  target_ulong *cs_base, uint32_t
> *flags)
> @@ -278,7 +278,7 @@ static inline void cpu_get_tb_cpu_state(CPURISCVState
> *env, target_ulong *pc,
>  *pc = env->pc;
>  *cs_base = 0;
>  #ifdef CONFIG_USER_ONLY
> -*flags = TB_FLAGS_FP_ENABLE;
> +*flags = TB_FLAGS_MSTATUS_FS;
>  #else
>  *flags = cpu_mmu_index(env, 0) | (env->mstatus & MSTATUS_FS);
>  #endif
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 808eab7f50..a30724aa90 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -43,7 +43,7 @@ typedef struct DisasContext {
>  target_ulong pc;
>  target_ulong next_pc;
>  uint32_t opcode;
> -uint32_t flags;
> +uint32_t mstatus_fs;
>  uint32_t mem_idx;
>  int singlestep_enabled;
>  int bstate;
> @@ -665,7 +665,7 @@ static void gen_fp_load(DisasContext *ctx, uint32_t
> opc, int rd,
>  {
>  TCGv t0;
>
> -if (!(ctx->flags & TB_FLAGS_FP_ENABLE)) {
> +if (ctx->mstatus_fs == 0) {
>  gen_exception_illegal(ctx);
>  return;
>  }
> @@ -695,7 +695,7 @@ static void gen_fp_store(DisasContext *ctx, uint32_t
> opc, int rs1,
>  {
>  TCGv t0;
>
> -if (!(ctx->flags & TB_FLAGS_FP_ENABLE)) {
> +if (ctx->mstatus_fs == 0) {
>  gen_exception_illegal(ctx);
>  return;
>  }
> @@ -986,7 +986,7 @@ static void gen_fp_arith(DisasContext *ctx, uint32_t
> opc, int rd,
>  {
>  TCGv t0 = NULL;
>
> -if (!(ctx->flags & TB_FLAGS_FP_ENABLE)) {
> +if (ctx->mstatus_fs == 0) {
>  goto do_illegal;
>  }
>
> @@ -1862,8 +1862,8 @@ void gen_intermediate_code(CPUState *cs,
> TranslationBlock *tb)
>
>  ctx.tb = tb;
>  ctx.bstate = BS_NONE;
> -ctx.flags = tb->flags;
>  ctx.mem_idx = tb->flags & TB_FLAGS_MMU_MASK;
> +ctx.mstatus_fs = tb->flags & TB_FLAGS_MSTATUS_FS;
>  ctx.frm = -1;  /* unknown rounding mode */
>
>  num_insns = 0;
> --
> 2.14.3
>
>


[Qemu-devel] [PULL 1/2] RISC-V: Convert cpu definition to future model

2018-03-28 Thread Michael Clark
- Model borrowed from target/sh4/cpu.c
- Rewrote riscv_cpu_list to use object_class_get_list
- Dropped 'struct RISCVCPUInfo' and used TypeInfo array
- Replaced riscv_cpu_register_types with DEFINE_TYPES
- Marked base class as abstract
- Fixes -cpu list

Cc: Igor Mammedov 
Cc: Sagar Karandikar 
Cc: Bastian Koppelmann 
Cc: Palmer Dabbelt 
Signed-off-by: Michael Clark 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Igor Mammedov 
---
 target/riscv/cpu.c | 123 ++---
 1 file changed, 69 insertions(+), 54 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 9de34d7..5a527fb 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -115,6 +115,8 @@ static void riscv_any_cpu_init(Object *obj)
 set_resetvec(env, DEFAULT_RSTVEC);
 }
 
+#if defined(TARGET_RISCV32)
+
 static void rv32gcsu_priv1_09_1_cpu_init(Object *obj)
 {
 CPURISCVState *env = _CPU(obj)->env;
@@ -141,6 +143,8 @@ static void rv32imacu_nommu_cpu_init(Object *obj)
 set_resetvec(env, DEFAULT_RSTVEC);
 }
 
+#elif defined(TARGET_RISCV64)
+
 static void rv64gcsu_priv1_09_1_cpu_init(Object *obj)
 {
 CPURISCVState *env = _CPU(obj)->env;
@@ -167,20 +171,7 @@ static void rv64imacu_nommu_cpu_init(Object *obj)
 set_resetvec(env, DEFAULT_RSTVEC);
 }
 
-static const RISCVCPUInfo riscv_cpus[] = {
-{ 96, TYPE_RISCV_CPU_ANY,  riscv_any_cpu_init },
-{ 32, TYPE_RISCV_CPU_RV32GCSU_V1_09_1, rv32gcsu_priv1_09_1_cpu_init },
-{ 32, TYPE_RISCV_CPU_RV32GCSU_V1_10_0, rv32gcsu_priv1_10_0_cpu_init },
-{ 32, TYPE_RISCV_CPU_RV32IMACU_NOMMU,  rv32imacu_nommu_cpu_init },
-{ 32, TYPE_RISCV_CPU_SIFIVE_E31,   rv32imacu_nommu_cpu_init },
-{ 32, TYPE_RISCV_CPU_SIFIVE_U34,   rv32gcsu_priv1_10_0_cpu_init },
-{ 64, TYPE_RISCV_CPU_RV64GCSU_V1_09_1, rv64gcsu_priv1_09_1_cpu_init },
-{ 64, TYPE_RISCV_CPU_RV64GCSU_V1_10_0, rv64gcsu_priv1_10_0_cpu_init },
-{ 64, TYPE_RISCV_CPU_RV64IMACU_NOMMU,  rv64imacu_nommu_cpu_init },
-{ 64, TYPE_RISCV_CPU_SIFIVE_E51,   rv64imacu_nommu_cpu_init },
-{ 64, TYPE_RISCV_CPU_SIFIVE_U54,   rv64gcsu_priv1_10_0_cpu_init },
-{ 0, NULL, NULL }
-};
+#endif
 
 static ObjectClass *riscv_cpu_class_by_name(const char *cpu_model)
 {
@@ -366,28 +357,6 @@ static void riscv_cpu_class_init(ObjectClass *c, void 
*data)
 cc->vmsd = _riscv_cpu;
 }
 
-static void cpu_register(const RISCVCPUInfo *info)
-{
-TypeInfo type_info = {
-.name = info->name,
-.parent = TYPE_RISCV_CPU,
-.instance_size = sizeof(RISCVCPU),
-.instance_init = info->initfn,
-};
-
-type_register(_info);
-}
-
-static const TypeInfo riscv_cpu_type_info = {
-.name = TYPE_RISCV_CPU,
-.parent = TYPE_CPU,
-.instance_size = sizeof(RISCVCPU),
-.instance_init = riscv_cpu_init,
-.abstract = false,
-.class_size = sizeof(RISCVCPUClass),
-.class_init = riscv_cpu_class_init,
-};
-
 char *riscv_isa_string(RISCVCPU *cpu)
 {
 int i;
@@ -403,30 +372,76 @@ char *riscv_isa_string(RISCVCPU *cpu)
 return isa_str;
 }
 
-void riscv_cpu_list(FILE *f, fprintf_function cpu_fprintf)
+typedef struct RISCVCPUListState {
+fprintf_function cpu_fprintf;
+FILE *file;
+} RISCVCPUListState;
+
+static gint riscv_cpu_list_compare(gconstpointer a, gconstpointer b)
 {
-const RISCVCPUInfo *info = riscv_cpus;
+ObjectClass *class_a = (ObjectClass *)a;
+ObjectClass *class_b = (ObjectClass *)b;
+const char *name_a, *name_b;
 
-while (info->name) {
-if (info->bit_widths & TARGET_LONG_BITS) {
-(*cpu_fprintf)(f, "%s\n", info->name);
-}
-info++;
-}
+name_a = object_class_get_name(class_a);
+name_b = object_class_get_name(class_b);
+return strcmp(name_a, name_b);
 }
 
-static void riscv_cpu_register_types(void)
+static void riscv_cpu_list_entry(gpointer data, gpointer user_data)
 {
-const RISCVCPUInfo *info = riscv_cpus;
+RISCVCPUListState *s = user_data;
+const char *typename = object_class_get_name(OBJECT_CLASS(data));
+int len = strlen(typename) - strlen(RISCV_CPU_TYPE_SUFFIX);
 
-type_register_static(_cpu_type_info);
+(*s->cpu_fprintf)(s->file, "%.*s\n", len, typename);
+}
 
-while (info->name) {
-if (info->bit_widths & TARGET_LONG_BITS) {
-cpu_register(info);
-}
-info++;
-}
+void riscv_cpu_list(FILE *f, fprintf_function cpu_fprintf)
+{
+RISCVCPUListState s = {
+.cpu_fprintf = cpu_fprintf,
+.file = f,
+};
+GSList *list;
+
+list = object_class_get_list(TYPE_RISCV_CPU, false);
+list = g_slist_sort(list, riscv_cpu_list_compare);
+g_slist_foreach(list, riscv_cpu_list_entry, );
+g_slist_free(list);
 }
 
-type_init(riscv_cpu_register_types)
+#define DEFINE_CPU(type_name, initfn)   

[Qemu-devel] [PULL 0/2] RISC-V: Important fixes for QEMU 2.12

2018-03-28 Thread Michael Clark
The following changes since commit 043289bef4d9c0d277c45695c676a6cc9fca48a0:

  Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20180328' into 
staging (2018-03-28 13:30:10 +0100)

are available in the git repository at:

  https://github.com/riscv/riscv-qemu.git tags/riscv-qemu-2.12-important-fixes

for you to fetch changes up to 33b4f859f1e1ea6722d10c3e9c0e3d85afb44ff4:

  RISC-V: Fix incorrect disassembly for addiw (2018-03-28 11:12:02 -0700)


RISC-V: Important fixes for QEMU 2.12

This series includes changes that are considered important.
i.e. correct user-visible bugs that are exercised by common
operations such as -cpu list (CPU model changes) or -d in_asm
(fix for disassembly of addiw)


Michael Clark (2):
  RISC-V: Convert cpu definition to future model
  RISC-V: Fix incorrect disassembly for addiw

 disas/riscv.c  |   2 +-
 target/riscv/cpu.c | 123 
+---
 2 files changed, 70 insertions(+), 55 deletions(-)



[Qemu-devel] [PULL 2/2] RISC-V: Fix incorrect disassembly for addiw

2018-03-28 Thread Michael Clark
This fixes a bug in the disassembler constraints used
to lift instructions into pseudo-instructions, whereby
addiw instructions are always lifted to sext.w instead
of just lifting addiw with a zero immediate.

An associated fix has been made to the metadata used to
machine generate the disseasembler:

https://github.com/michaeljclark/riscv-meta/
commit/4a6b2f3898430768acfe201405224d2ea31e1477

Cc: Sagar Karandikar 
Cc: Bastian Koppelmann 
Cc: Palmer Dabbelt 
Cc: Peter Maydell 
Signed-off-by: Michael Clark 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
---
 disas/riscv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/disas/riscv.c b/disas/riscv.c
index 3c17501..74ad16e 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -600,7 +600,7 @@ static const rvc_constraint rvcc_mv[] = { rvc_imm_eq_zero, 
rvc_end };
 static const rvc_constraint rvcc_not[] = { rvc_imm_eq_n1, rvc_end };
 static const rvc_constraint rvcc_neg[] = { rvc_rs1_eq_x0, rvc_end };
 static const rvc_constraint rvcc_negw[] = { rvc_rs1_eq_x0, rvc_end };
-static const rvc_constraint rvcc_sext_w[] = { rvc_rs2_eq_x0, rvc_end };
+static const rvc_constraint rvcc_sext_w[] = { rvc_imm_eq_zero, rvc_end };
 static const rvc_constraint rvcc_seqz[] = { rvc_imm_eq_p1, rvc_end };
 static const rvc_constraint rvcc_snez[] = { rvc_rs1_eq_x0, rvc_end };
 static const rvc_constraint rvcc_sltz[] = { rvc_rs2_eq_x0, rvc_end };
-- 
2.7.0




Re: [Qemu-devel] [PATCH v4 5/9] qapi: introduce new cmd option "allowed-in-preconfig"

2018-03-28 Thread Eduardo Habkost
On Wed, Mar 28, 2018 at 02:29:57PM +0200, Igor Mammedov wrote:
> On Fri, 23 Mar 2018 18:28:37 -0300
> Eduardo Habkost  wrote:
> 
> > On Mon, Mar 12, 2018 at 02:11:11PM +0100, Igor Mammedov wrote:
> > > New option will be used to allow commands, which are prepared/need
> > > to run run in preconfig state. Other commands that should be able
> > > to run in preconfig state, should be ammeded to not expect machine
> > > in initialized state or deal with it.
> > > 
> > > For compatibility reasons, commands, that don't use new flag
> > > 'allowed-in-preconfig' explicitly, are not permited to run in
> > > preconfig state but allowed in all other states like they used
> > > to be.
> > > 
> > > Within this patch allow following commands in preconfig state:
> > >qmp_capabilities
> > >query-qmp-schema
> > >query-commands
> > >query-status
> > >cont
> > > to allow qmp connection, basic introspection and moving to the next
> > > state.
> > > 
> > > PS:
> > > set-numa-node and query-hotpluggable-cpus will be enabled later in
> > > a separate patch.
> > > 
> > > Signed-off-by: Igor Mammedov   
> > 
> > I didn't review the code yet, but:
> > 
> > Shouldn't this be applied before patch 3/9, for bisectability?
> > Otherwise it will be very easy to crash QEMU after applying patch
> > 3/9.
> no, it isn't going to work.
> This patch depends on RUN_STATE_PRECONFIG that is introduced in 3/9.
> 
> It could be fine to merge into 3/9 during merge, but then history
> wise it would be difficult to read it later with 2 big and mostly
> separate changes within one patch.

Yeah, I don't think squashing would be the right answer.

> 
> Considering -preconfig if off by default it shouldn't affect
> bisectability in general so I'd keep current patch order.

Well, it would affect bisectability if debugging a crash that
happens using -preconfig.

The only hunk in this patch that really depends on patch 3/9
seems to be:

  @@ -92,6 +93,13 @@ static QObject *do_qmp_dispatch(QmpCommandList *cmds, 
QObject *request,
   return NULL;
   }
  
  +if (runstate_check(RUN_STATE_PRECONFIG) &&
  +!(cmd->options & QCO_ALLOWED_IN_PRECONFIG)) {
  +error_setg(errp, "The command '%s' isn't permitted in '%s' state",
  +   cmd->name, RunState_str(RUN_STATE_PRECONFIG));
  +return NULL;
  +}
  +
   if (!qdict_haskey(dict, "arguments")) {
   args = qdict_new();
   } else {
  

What about moving it to patch 3/9?


Or, an alternative is to move the following hunk from patch 3/9 to this patch:

  diff --git a/qapi/run-state.json b/qapi/run-state.json
  index 1c9fff3aef..ce846a570e 100644
  --- a/qapi/run-state.json
  +++ b/qapi/run-state.json
  @@ -49,12 +49,15 @@
   # @colo: guest is paused to save/restore VM state under colo checkpoint,
   #VM can not get into this state unless colo capability is enabled
   #for migration. (since 2.8)
  +# @preconfig: QEMU is paused before board specific init callback is executed.
  +# The state is reachable only if -preconfig CLI option is used.
  +# (Since 2.12)
   ##
   { 'enum': 'RunState',
 'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused',
   'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm',
   'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
  -'guest-panicked', 'colo' ] }
  +'guest-panicked', 'colo', 'preconfig' ] }
  
   ##
   # @StatusInfo:

Which could be an interesting idea, because the QAPI schema
changes would be all grouped inside a single patch, and then
followed by the actual implementation of the -preconfig option.

-- 
Eduardo




[Qemu-devel] [PATCH v2 2/2] vhost-user: back SET/GET_CONFIG requests with a protocol feature

2018-03-28 Thread Maxime Coquelin
Without a dedicated protocol feature, QEMU cannot know whether
the backend can handle VHOST_USER_SET_CONFIG and
VHOST_USER_GET_CONFIG messages.

This patch adds a protocol feature that is only advertised by
QEMU if the device implements the config ops. Vhost user init
fails if the device support the feature but the backend doesn't.

The backend should only send VHOST_USER_SLAVE_CONFIG_CHANGE_MSG
requests if the protocol feature has been negotiated.

Signed-off-by: Maxime Coquelin 
---
 docs/interop/vhost-user.txt | 21 -
 hw/virtio/vhost-user.c  | 22 ++
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
index c058c407df..534caab18a 100644
--- a/docs/interop/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
@@ -379,6 +379,7 @@ Protocol features
 #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN   6
 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
 #define VHOST_USER_PROTOCOL_F_PAGEFAULT  8
+#define VHOST_USER_PROTOCOL_F_CONFIG 9
 
 Master message types
 
@@ -664,7 +665,8 @@ Master message types
   Master payload: virtio device config space
   Slave payload: virtio device config space
 
-  Submitted by the vhost-user master to fetch the contents of the virtio
+  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
+  submitted by the vhost-user master to fetch the contents of the virtio
   device configuration space, vhost-user slave's payload size MUST match
   master's request, vhost-user slave uses zero length of payload to
   indicate an error to vhost-user master. The vhost-user master may
@@ -677,7 +679,8 @@ Master message types
   Master payload: virtio device config space
   Slave payload: N/A
 
-  Submitted by the vhost-user master when the Guest changes the virtio
+  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
+  submitted by the vhost-user master when the Guest changes the virtio
   device configuration space and also can be used for live migration
   on the destination host. The vhost-user slave must check the flags
   field, and slaves MUST NOT accept SET_CONFIG for read-only
@@ -766,13 +769,13 @@ Slave message types
  Slave payload: N/A
  Master payload: N/A
 
- Vhost-user slave sends such messages to notify that the virtio device's
- configuration space has changed, for those host devices which can support
- such feature, host driver can send VHOST_USER_GET_CONFIG message to slave
- to get the latest content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is
- negotiated, and slave set the VHOST_USER_NEED_REPLY flag, master must
- respond with zero when operation is successfully completed, or non-zero
- otherwise.
+ When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, vhost-user slave sends
+ such messages to notify that the virtio device's configuration space has
+ changed, for those host devices which can support such feature, host
+ driver can send VHOST_USER_GET_CONFIG message to slave to get the latest
+ content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, and slave set
+ the VHOST_USER_NEED_REPLY flag, master must respond with zero when
+ operation is successfully completed, or non-zero otherwise.
 
 VHOST_USER_PROTOCOL_F_REPLY_ACK:
 ---
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 44aea5c0a8..cc8a24aa31 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -46,6 +46,7 @@ enum VhostUserProtocolFeature {
 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
+VHOST_USER_PROTOCOL_F_CONFIG = 9,
 VHOST_USER_PROTOCOL_F_MAX
 };
 
@@ -1211,6 +1212,17 @@ static int vhost_user_init(struct vhost_dev *dev, void 
*opaque)
 
 dev->protocol_features =
 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
+
+if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
+/* Dont acknowledge CONFIG feature if device doesn't support it */
+dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+} else if (!(protocol_features &
+(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
+error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
+"but backend does not support it.");
+return -1;
+}
+
 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
 if (err < 0) {
 return err;
@@ -1405,6 +1417,11 @@ static int vhost_user_get_config(struct vhost_dev *dev, 
uint8_t *config,
 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
 };
 
+if (!virtio_has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_CONFIG)) {
+return -1;

[Qemu-devel] [PATCH v2 0/2] vhost-user: Back SET/GET_CONFIG with a protocol feature

2018-03-28 Thread Maxime Coquelin

V2 makes vhost-user init to fail if the device implements config
feature but the backend doesn't.

While reviewing DPDK series adding support to VHOST_USER_SET_CONFIG
and VHOST_USER_GET_CONFIG request, I found that it was not backed
with a dedicated protocol feature.

This series addresses this by adding a new protocol feature bit,
and by only negotiating it if the device supports it, as suggested
by Michael. Indeed, if the feature is supported by other type of
devices in the future, it would confuse the backends as it couldn't
know whether the device really support it or not.

To know whether the vhost device support config feature, the trick
is to check whether it implemented the config_ops. That's the 
reason why the first patch moves setting the config ops in
vhost-user-blk befoire calling vhost_user_init().

The series targets v2.12 release, else we may have to disable these
requests in this release.

*NOTE*: The series has only been tested as I don't have the
environment to try it. Changpeng, can you please test it?

Thanks,
Maxime

Changes since v1:
=
 - Fail vhost-user init if device implements config
   feature but the backend doesn't. (mst)

Maxime Coquelin (2):
  vhost-user-blk: set config ops before vhost-user init
  vhost-user: back SET/GET_CONFIG requests with a protocol feature

 docs/interop/vhost-user.txt | 21 -
 hw/block/vhost-user-blk.c   |  4 ++--
 hw/virtio/vhost-user.c  | 22 ++
 3 files changed, 36 insertions(+), 11 deletions(-)

-- 
2.14.3




[Qemu-devel] [PATCH v2 1/2] vhost-user-blk: set config ops before vhost-user init

2018-03-28 Thread Maxime Coquelin
As soon as vhost-user init is done, the backend may send
VHOST_USER_SLAVE_CONFIG_CHANGE_MSG, so let's set the
notification callback before it.

Also, it will be used to know whether the device supports
the config feature to advertize it or not.

Signed-off-by: Maxime Coquelin 
---
 hw/block/vhost-user-blk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index f840f07dfe..262baca432 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -259,6 +259,8 @@ static void vhost_user_blk_device_realize(DeviceState *dev, 
Error **errp)
 s->dev.vq_index = 0;
 s->dev.backend_features = 0;
 
+vhost_dev_set_config_notifier(>dev, _ops);
+
 ret = vhost_dev_init(>dev, >chardev, VHOST_BACKEND_TYPE_USER, 0);
 if (ret < 0) {
 error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
@@ -277,8 +279,6 @@ static void vhost_user_blk_device_realize(DeviceState *dev, 
Error **errp)
 s->blkcfg.num_queues = s->num_queues;
 }
 
-vhost_dev_set_config_notifier(>dev, _ops);
-
 return;
 
 vhost_err:
-- 
2.14.3




Re: [Qemu-devel] [PATCH v4 3/9] cli: add -preconfig option

2018-03-28 Thread Eduardo Habkost
On Wed, Mar 28, 2018 at 01:48:35PM +0200, Igor Mammedov wrote:
> On Tue, 27 Mar 2018 17:05:41 +0200
> Igor Mammedov  wrote:
> 
> > On Fri, 23 Mar 2018 18:25:08 -0300
> > Eduardo Habkost  wrote:
> > 
> > > On Mon, Mar 12, 2018 at 02:11:09PM +0100, Igor Mammedov wrote:  
> > [...]
> [...]
> > > > @@ -1886,6 +1895,13 @@ static bool main_loop_should_exit(void)
> > > >  RunState r;
> > > >  ShutdownCause request;
> > > >  
> > > > +if (preconfig_exit_requested) {
> > > > +if (runstate_check(RUN_STATE_PRECONFIG)) {
> > > 
> > > Is it possible to have preconfig_exit_request set outside of
> > > RUN_STATE_PRECONFIG?  When and why?  
> > preconfig_exit_requested is initialized with TRUE and
> > in combo with '-inmigrate' we need this runstate check.
> > it's the same as it was with
> >  { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
> > which I probably should remove (I need to check it though)
> [...]
> 
> > > > @@ -4594,6 +4623,10 @@ int main(int argc, char **argv, char **envp)
> > > >  }
> > > >  parse_numa_opts(current_machine);
> > > >  
> > > > +/* do monitor/qmp handling at preconfig state if requested */
> > > > +main_loop();
> > > 
> > > Wouldn't it be simpler to do "if (!preconfig) { main_loop(); }"
> > > instead of entering main_loop() just to exit immediately?  
> > The thought didn't cross my mind, it might work and more readable
> > as one doesn't have to jump into main_loop() to find out that
> > it would exit immediately.
> > I'll try to it on respin.
> Well doing as suggested end ups more messy:
> 
> @@static bool main_loop_should_exit(void)
> ...
> if (preconfig_exit_requested) {
> runstate_set(RUN_STATE_PRELAUNCH);
> 
> return true;
> }
>
> @@main
> /* do monitor/qmp handling at preconfig state if requested */
> if (!preconfig_exit_requested) {
> main_loop();
> } else if (runstate_check(RUN_STATE_PRECONFIG)) {
> runstate_set(RUN_STATE_PRELAUNCH);
> }

This doesn't make sense to me.  Why would we enter
RUN_STATE_PRECONFIG state if -preconfig is not used at all?


> preconfig_exit_requested = false;
> ...
> 
> I'd prefer original v4 approach, where only main_loop_should_exit()
> has to deal with state transitions and book-keeping.

If the above is unavoidable, I agree.  But I still don't
understand we have to enter PRECONFIG state if the user didn't
specify -preconfig.

-- 
Eduardo



Re: [Qemu-devel] [PATCH v4 3/9] cli: add -preconfig option

2018-03-28 Thread Eduardo Habkost
On Tue, Mar 27, 2018 at 05:05:41PM +0200, Igor Mammedov wrote:
> On Fri, 23 Mar 2018 18:25:08 -0300
> Eduardo Habkost  wrote:
> 
> > On Mon, Mar 12, 2018 at 02:11:09PM +0100, Igor Mammedov wrote:
> [...]
> > > diff --git a/vl.c b/vl.c
> > > index 3ef04ce..69b1997 100644
> > > --- a/vl.c
> > > +++ b/vl.c
> > > @@ -593,7 +593,7 @@ static int default_driver_check(void *opaque, 
> > > QemuOpts *opts, Error **errp)
> > >  /***/
> > >  /* QEMU state */
> > >  
> > > -static RunState current_run_state = RUN_STATE_PRELAUNCH;
> > > +static RunState current_run_state = RUN_STATE_PRECONFIG;
> > >  
> > >  /* We use RUN_STATE__MAX but any invalid value will do */
> > >  static RunState vmstop_requested = RUN_STATE__MAX;
> > > @@ -606,6 +606,9 @@ typedef struct {
> > >  
> > >  static const RunStateTransition runstate_transitions_def[] = {
> > >  /* from  -> to  */
> > > +{ RUN_STATE_PRECONFIG, RUN_STATE_PRELAUNCH },
> > > +{ RUN_STATE_PRECONFIG, RUN_STATE_INMIGRATE },  
> > 
> > Don't this mean -preconfig and -incoming could work together?
> theoretically yes, but its not the reason why this transition is here.
> It's mimicking existing approach where initial state
>{ RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
> were allowed to move to the next possible (including RUN_STATE_INMIGRATE)

I still don't get it.  Where this definition of "next possible"
comes from?  If -incoming and -preconfig don't work together, why
is PRECONFIG -> INMIGRATE migration considered possible?


> 
> > > +
> > >  { RUN_STATE_DEBUG, RUN_STATE_RUNNING },
> > >  { RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE },
> > >  { RUN_STATE_DEBUG, RUN_STATE_PRELAUNCH },
> > > @@ -1629,6 +1632,7 @@ static pid_t shutdown_pid;
> > >  static int powerdown_requested;
> > >  static int debug_requested;
> > >  static int suspend_requested;
> > > +static bool preconfig_exit_requested = true;
> > >  static WakeupReason wakeup_reason;
> > >  static NotifierList powerdown_notifiers =
> > >  NOTIFIER_LIST_INITIALIZER(powerdown_notifiers);
> > > @@ -1713,6 +1717,11 @@ static int qemu_debug_requested(void)
> > >  return r;
> > >  }
> > >  
> > > +void qemu_exit_preconfig_request(void)
> > > +{
> > > +preconfig_exit_requested = true;
> > > +}
> > > +
> > >  /*
> > >   * Reset the VM. Issue an event unless @reason is SHUTDOWN_CAUSE_NONE.
> > >   */
> > > @@ -1886,6 +1895,13 @@ static bool main_loop_should_exit(void)
> > >  RunState r;
> > >  ShutdownCause request;
> > >  
> > > +if (preconfig_exit_requested) {
> > > +if (runstate_check(RUN_STATE_PRECONFIG)) {  
> > 
> > Is it possible to have preconfig_exit_request set outside of
> > RUN_STATE_PRECONFIG?  When and why?
> preconfig_exit_requested is initialized with TRUE and
> in combo with '-inmigrate' we need this runstate check.

I think this now makes sense to me.  It still looks confusing,
but I don't have a better suggestion right now.

Except...

Why exactly do you need to use main_loop() and
main_loop_should_exit() for the preconfig loop?  What about a
separate preconfig_loop() and preconfig_loop_should_exit()
function?


> it's the same as it was with
>  { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
> which I probably should remove (I need to check it though)
> 
> > > +runstate_set(RUN_STATE_PRELAUNCH);
> > > +}
> > > +preconfig_exit_requested = false;

What happens if we don't set preconfig_exit_requested=false here?


> > > +return true;
> > > +}
> > >  if (qemu_debug_requested()) {
> > >  vm_stop(RUN_STATE_DEBUG);
> > >  }
> > > @@ -3697,6 +3713,14 @@ int main(int argc, char **argv, char **envp)
> > >  exit(1);
> > >  }
> > >  break;
> > > +case QEMU_OPTION_preconfig:
> > > +if (runstate_check(RUN_STATE_INMIGRATE)) {
> > > +error_report("option can not be used with "
> > > + "-incoming option");
> > > +exit(EXIT_FAILURE);
> > > +}  
> > 
> > So -incoming changes runstate as soon as the option is parsed?
> > 
> > Ouch.
> yep and it's rather fragile (it's well out of scope of
> this series to re-factor this, so I'm not changing it here)
> 
> > I would rather not rely on that behavior and just do
> > "if (incoming)".
> > 
> > Why exactly it's not possible to use -incoming with -preconfig?
> there are 2 reasons why I made options mutually exclusive
> 1. (excuse ) '-incoming' is an option with non explicit side effects
>on other parts of code. It's hard to predict behavior
>of preconfig commands in combination with inmigrate.
>I wouldn't try to touch/change anything related to it
>in this series.
>If we need to change how option is handled, it should
>be separate series that focuses on it.
> 2. (main reason) is to expose as 

[Qemu-devel] [PATCH 0/1] WHPX Add signature CPUID

2018-03-28 Thread Alessandro Pilotti
Add support for CPUID 0x4000 in WHPX, requiring Justin Terry's patch
that adds support for CPUID 1.

Based-on: <20180326170658.606-1-jute...@microsoft.com>

Alessandro Pilotti (1):
  WHPX Add signature CPUID

 target/i386/whpx-all.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

--
2.13.2




[Qemu-devel] [PATCH 1/1] WHPX Add signature CPUID

2018-03-28 Thread Alessandro Pilotti
Adds the CPUID trap for CPUID 0x4000, sending the WHPX signature
to the guest upon request. This is consistent with other QEMU
accelerators (KVM).

Signed-off-by: Alessandro Pilotti 
---
 target/i386/whpx-all.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c
index efa1441479..4085002428 100644
--- a/target/i386/whpx-all.c
+++ b/target/i386/whpx-all.c
@@ -29,6 +29,8 @@
 #include 
 #include 
 
+#define WHPX_CPUID_SIGNATURE 0x4000
+
 struct whpx_state {
 uint64_t mem_quota;
 WHV_PARTITION_HANDLE partition;
@@ -918,6 +920,7 @@ static int whpx_vcpu_run(CPUState *cpu)
 WHV_REGISTER_NAME reg_names[5];
 UINT32 reg_count = 5;
 UINT64 rip, rax, rcx, rdx, rbx;
+UINT32 signature[3] = {0};
 
 rip = vcpu->exit_ctx.VpContext.Rip +
   vcpu->exit_ctx.VpContext.InstructionLength;
@@ -932,6 +935,13 @@ static int whpx_vcpu_run(CPUState *cpu)
 rdx = vcpu->exit_ctx.CpuidAccess.DefaultResultRdx;
 rbx = vcpu->exit_ctx.CpuidAccess.DefaultResultRbx;
 break;
+case WHPX_CPUID_SIGNATURE:
+memcpy(signature, "WHPXWHPXWHPX", 12);
+rax = vcpu->exit_ctx.CpuidAccess.DefaultResultRax;
+rbx = signature[0];
+rcx = signature[1];
+rdx = signature[2];
+break;
 default:
 rax = vcpu->exit_ctx.CpuidAccess.DefaultResultRax;
 rcx = vcpu->exit_ctx.CpuidAccess.DefaultResultRcx;
@@ -1338,7 +1348,7 @@ static int whpx_accel_init(MachineState *ms)
 goto error;
 }
 
-UINT32 cpuidExitList[] = {1};
+UINT32 cpuidExitList[] = {1, WHPX_CPUID_SIGNATURE};
 hr = WHvSetPartitionProperty(whpx->partition,
  WHvPartitionPropertyCodeCpuidExitList,
  cpuidExitList,
-- 
2.13.2




Re: [Qemu-devel] [PATCH v4 4/9] hmp: disable monitor in preconfig state

2018-03-28 Thread Eduardo Habkost
On Wed, Mar 28, 2018 at 01:16:53PM +0200, Igor Mammedov wrote:
> On Fri, 23 Mar 2018 18:27:32 -0300
> Eduardo Habkost  wrote:
> 
> > On Mon, Mar 12, 2018 at 02:11:10PM +0100, Igor Mammedov wrote:
> > > Ban it for now, if someone would need it to work early,
> > > one would have to implement checks if HMP command is valid
> > > at preconfig state.
> > > 
> > > Signed-off-by: Igor Mammedov 
> > > ---
> > > v4:
> > >   * v3 was only printing error but not preventing command execution,
> > > Fix it by returning after printing error message.
> > > ("Dr. David Alan Gilbert" )
> > > ---
> > >  monitor.c | 5 +
> > >  1 file changed, 5 insertions(+)
> > > 
> > > diff --git a/monitor.c b/monitor.c
> > > index a4417f2..ea0ca57 100644
> > > --- a/monitor.c
> > > +++ b/monitor.c
> > > @@ -3104,6 +3104,11 @@ static void handle_hmp_command(Monitor *mon, const 
> > > char *cmdline)
> > >  
> > >  trace_handle_hmp_command(mon, cmdline);
> > >  
> > > +if (runstate_check(RUN_STATE_PRECONFIG)) {
> > > +monitor_printf(mon, "HMP not available in preconfig state\n");
> > > +return;  
> > 
> > Not even the "cont" command?  It would be useful for testing
> > -preconfig.
> As someone already said on the list it's very easy to test with
> QMP nowdays, just use qmp-shell for that.
> So if someone isn't willing to learn to use QMP, one can write
> HMP part with proper white-listing.
> 
> I can extend error message like this:
> 
> "HMP not available in preconfig state, use QMP instead\n"

Sounds good enough to me.

-- 
Eduardo



Re: [Qemu-devel] [PATCH v4 2/9] numa: split out NumaOptions parsing into parse_NumaOptions()

2018-03-28 Thread Eduardo Habkost
On Tue, Mar 27, 2018 at 03:08:27PM +0200, Igor Mammedov wrote:
> On Fri, 23 Mar 2018 17:42:18 -0300
> Eduardo Habkost  wrote:
> 
> > On Mon, Mar 12, 2018 at 02:11:08PM +0100, Igor Mammedov wrote:
> > > it will allow to reuse parse_NumaOptions() for parsing
> > > configuration commands received via QMP interface
> > > 
> > > Signed-off-by: Igor Mammedov 
> > > ---
> > >  include/sysemu/numa.h |  1 +
> > >  numa.c| 48 
> > > +---
> > >  2 files changed, 30 insertions(+), 19 deletions(-)
> > > 
> > > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
> > > index 21713b7..7a0ae75 100644
> > > --- a/include/sysemu/numa.h
> > > +++ b/include/sysemu/numa.h
> > > @@ -22,6 +22,7 @@ struct NumaNodeMem {
> > >  };
> > >  
> > >  extern NodeInfo numa_info[MAX_NODES];
> > > +int parse_numa(void *opaque, QemuOpts *opts, Error **errp);
> > >  void parse_numa_opts(MachineState *ms);
> > >  void numa_complete_configuration(MachineState *ms);
> > >  void query_numa_node_mem(NumaNodeMem node_mem[]);
> > > diff --git a/numa.c b/numa.c
> > > index 126c649..2b1d292 100644
> > > --- a/numa.c
> > > +++ b/numa.c
> > > @@ -169,28 +169,11 @@ static void parse_numa_distance(NumaDistOptions 
> > > *dist, Error **errp)
> > >  have_numa_distance = true;
> > >  }
> > >  
> > > -static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
> > > +static
> > > +void parse_NumaOptions(MachineState *ms, NumaOptions *object, Error 
> > > **errp)  
> > 
> > I wonder if we should rename the parse_numa_{node,distance}()
> > functions to configure_numa_{node,distance}(), and this one
> > configure_numa().  These functions don't parse anything, anymore.
> I'd preffer to do it in another patch on top of this series
> (added it my TODO list)

I agree with renaming parse_numa*() later, but the new function
you are creating can have a more reasonable name as it doesn't
parse anything.


> 
> 
> > >  {
> > > -NumaOptions *object = NULL;
> > > -MachineState *ms = opaque;
> > >  Error *err = NULL;
> > >  
> > > -{
> > > -Visitor *v = opts_visitor_new(opts);
> > > -visit_type_NumaOptions(v, NULL, , );
> > > -visit_free(v);
> > > -}
> > > -
> > > -if (err) {
> > > -goto end;
> > > -}
> > > -
> > > -/* Fix up legacy suffix-less format */
> > > -if ((object->type == NUMA_OPTIONS_TYPE_NODE) && 
> > > object->u.node.has_mem) {
> > > -const char *mem_str = qemu_opt_get(opts, "mem");
> > > -qemu_strtosz_MiB(mem_str, NULL, >u.node.mem);
> > > -}
> > > -
> > >  switch (object->type) {
> > >  case NUMA_OPTIONS_TYPE_NODE:
> > >  parse_numa_node(ms, >u.node, );
> > > @@ -224,6 +207,33 @@ static int parse_numa(void *opaque, QemuOpts *opts, 
> > > Error **errp)
> > >  }
> > >  
> > >  end:
> > > +if (err) {
> > > +error_propagate(errp, err);
> > > +}  
> > 
> > "if (err)" is not necessary here.  See
> > scripts/coccinelle/error_propagate_null.cocci.
> fixed

Thanks!

>  
> > > +}
> > > +
> > > +int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
> > > +{
> > > +NumaOptions *object = NULL;
> > > +MachineState *ms = MACHINE(opaque);
> > > +Error *err = NULL;
> > > +Visitor *v = opts_visitor_new(opts);
> > > +
> > > +visit_type_NumaOptions(v, NULL, , );
> > > +visit_free(v);
> > > +if (err) {
> > > +goto end;
> > > +}
> > > +
> > > +/* Fix up legacy suffix-less format */
> > > +if ((object->type == NUMA_OPTIONS_TYPE_NODE) && 
> > > object->u.node.has_mem) {
> > > +const char *mem_str = qemu_opt_get(opts, "mem");
> > > +qemu_strtosz_MiB(mem_str, NULL, >u.node.mem);
> > > +}
> > > +
> > > +parse_NumaOptions(ms, object, );
> > > +
> > > +end:
> > >  qapi_free_NumaOptions(object);
> > >  if (err) {
> > >  error_report_err(err);  
> > 
> > We can fix this one too while at it.
> error_report_err() doesn't check for NULL value,
> 'if(err)' is needed here

Sorry, my mistake.

-- 
Eduardo



Re: [Qemu-devel] [PATCH for-2.12 v2 2/2] i386/hyperv: error out if features requested but unsupported

2018-03-28 Thread Eduardo Habkost
On Wed, Mar 28, 2018 at 06:30:24PM +0300, Roman Kagan wrote:
> In order to guarantee compatibility on migration, QEMU should have
> complete control over the features it announces to the guest via CPUID.
> 
> However, for a number of Hyper-V-related cpu properties, if the
> corresponding feature is not supported by the underlying KVM, the
> propery is silently ignored and the feature is not announced to the
> guest.
> 
> Refuse to start with an error instead.
> 
> Signed-off-by: Roman Kagan 

Something I didn't consider before:

Will this block migration before it even starts, or will crash
the VM only after all migration data was sent to the destination?

I didn't test it, but kvm_arch_init_vcpu() seems to be too late
to block an invalid/unsupport configuration.

Maybe we can simply call hyperv_handle_properties() earlier,
inside x86_cpu_realizefn()?

(I know it's very late for this kind of intrusive change in
v2.12, but I still think it's a good idea to fix this as soon as
possible.)


> ---
> v1 -> v2:
>  - indicate what flag requested the feature that can't be enabled in the
>error message
>  - fix a typo in the error message for VP_RUNTIME
> 
>  target/i386/kvm.c | 32 
>  1 file changed, 28 insertions(+), 4 deletions(-)
> 
> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> index b35623ae24..113926aff2 100644
> --- a/target/i386/kvm.c
> +++ b/target/i386/kvm.c
> @@ -659,17 +659,41 @@ static int hyperv_handle_properties(CPUState *cs)
>  env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS;
>  env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE;
>  }
> -if (cpu->hyperv_crash && has_msr_hv_crash) {
> +if (cpu->hyperv_crash) {
> +if (!has_msr_hv_crash) {
> +fprintf(stderr, "Hyper-V crash MSRs "
> +"(requested by 'hv-crash' cpu flag) "
> +"are not supported by kernel\n");
> +return -ENOSYS;
> +}
>  env->features[FEAT_HYPERV_EDX] |= HV_GUEST_CRASH_MSR_AVAILABLE;
>  }
>  env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE;
> -if (cpu->hyperv_reset && has_msr_hv_reset) {
> +if (cpu->hyperv_reset) {
> +if (!has_msr_hv_reset) {
> +fprintf(stderr, "Hyper-V reset MSR "
> +"(requested by 'hv-reset' cpu flag) "
> +"is not supported by kernel\n");
> +return -ENOSYS;
> +}
>  env->features[FEAT_HYPERV_EAX] |= HV_RESET_AVAILABLE;
>  }
> -if (cpu->hyperv_vpindex && has_msr_hv_vpindex) {
> +if (cpu->hyperv_vpindex) {
> +if (!has_msr_hv_vpindex) {
> +fprintf(stderr, "Hyper-V VP_INDEX MSR "
> +"(requested by 'hv-vpindex' cpu flag) "
> +"is not supported by kernel\n");
> +return -ENOSYS;
> +}
>  env->features[FEAT_HYPERV_EAX] |= HV_VP_INDEX_AVAILABLE;
>  }
> -if (cpu->hyperv_runtime && has_msr_hv_runtime) {
> +if (cpu->hyperv_runtime) {
> +if (!has_msr_hv_runtime) {
> +fprintf(stderr, "Hyper-V VP_RUNTIME MSR "
> +"(requested by 'hv-runtime' cpu flag) "
> +"is not supported by kernel\n");
> +return -ENOSYS;
> +}
>  env->features[FEAT_HYPERV_EAX] |= HV_VP_RUNTIME_AVAILABLE;
>  }
>  if (cpu->hyperv_synic) {
> -- 
> 2.14.3
> 

-- 
Eduardo



Re: [Qemu-devel] [PATCH for-2.12 v2 1/2] i386/hyperv: add hv-frequencies cpu property

2018-03-28 Thread Eduardo Habkost
On Wed, Mar 28, 2018 at 06:30:23PM +0300, Roman Kagan wrote:
> In order to guarantee compatibility on migration, QEMU should have
> complete control over the features it announces to the guest via CPUID.
> 
> However, the availability of Hyper-V frequency MSRs
> (HV_X64_MSR_TSC_FREQUENCY and HV_X64_MSR_APIC_FREQUENCY) depends solely
> on the support for them in the underlying KVM.
> 
> Introduce "hv-frequencies" cpu property (off by default) which gives
> QEMU full control over whether these MSRs are announced.
> 
> While at this, drop the redundant check of the cpu tsc frequency, and
> decouple this feature from hv-time.
> 
> Signed-off-by: Roman Kagan 
> ---
> v1 -> v2:
>  - indicate what flag requested the feature that can't be enabled in the
>error message
> 
>  target/i386/cpu.h |  1 +
>  target/i386/cpu.c |  1 +
>  target/i386/kvm.c | 13 +
>  3 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 78db1b833a..1b219fafc4 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1296,6 +1296,7 @@ struct X86CPU {
>  bool hyperv_runtime;
>  bool hyperv_synic;
>  bool hyperv_stimer;
> +bool hyperv_frequencies;
>  bool check_cpuid;
>  bool enforce_cpuid;
>  bool expose_kvm;
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 555ae79d29..1a6b082b6f 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -4761,6 +4761,7 @@ static Property x86_cpu_properties[] = {
>  DEFINE_PROP_BOOL("hv-runtime", X86CPU, hyperv_runtime, false),
>  DEFINE_PROP_BOOL("hv-synic", X86CPU, hyperv_synic, false),
>  DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false),
> +DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false),
>  DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
>  DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
>  DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> index d23fff12f5..b35623ae24 100644
> --- a/target/i386/kvm.c
> +++ b/target/i386/kvm.c
> @@ -648,11 +648,16 @@ static int hyperv_handle_properties(CPUState *cs)
>  env->features[FEAT_HYPERV_EAX] |= HV_HYPERCALL_AVAILABLE;
>  env->features[FEAT_HYPERV_EAX] |= HV_TIME_REF_COUNT_AVAILABLE;
>  env->features[FEAT_HYPERV_EAX] |= HV_REFERENCE_TSC_AVAILABLE;
> -
> -if (has_msr_hv_frequencies && tsc_is_stable_and_known(env)) {
> -env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS;
> -env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE;
> +}
> +if (cpu->hyperv_frequencies) {
> +if (!has_msr_hv_frequencies) {
> +fprintf(stderr, "Hyper-V frequency MSRs "
> +"(requested by 'hv-frequencies' cpu flag) "
> +"are not supported by kernel\n");
> +return -ENOSYS;

I would like to move this to x86_cpu_filter_features(), but while
we don't refactor the Hyper-V CPUID code, this is good enough for
now.

Reviewed-by: Eduardo Habkost 

>  }
> +env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS;
> +env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE;
>  }
>  if (cpu->hyperv_crash && has_msr_hv_crash) {
>  env->features[FEAT_HYPERV_EDX] |= HV_GUEST_CRASH_MSR_AVAILABLE;
> -- 
> 2.14.3
> 

-- 
Eduardo



Re: [Qemu-devel] [PATCH] i386/kvm: add support for KVM_CAP_X86_DISABLE_EXITS

2018-03-28 Thread Eduardo Habkost
On Wed, Mar 28, 2018 at 03:06:23AM +0300, Michael S. Tsirkin wrote:
> On Tue, Mar 27, 2018 at 06:36:46PM -0300, Eduardo Habkost wrote:
> > On Tue, Mar 27, 2018 at 10:42:56PM +0300, Michael S. Tsirkin wrote:
> > > On Fri, Mar 16, 2018 at 07:36:42AM -0700, Wanpeng Li wrote:
> > > > From: Wanpeng Li 
> > > > 
> > > > This patch adds support for KVM_CAP_X86_DISABLE_EXITS. Provides 
> > > > userspace with 
> > > > per-VM capability(KVM_CAP_X86_DISABLE_EXITS) to not intercept 
> > > > MWAIT/HLT/PAUSE 
> > > > in order that to improve latency in some workloads.
> > > > 
> > [...]
> > > > diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> > > > index d23fff1..95ed9eb 100644
> > > > --- a/target/i386/kvm.c
> > > > +++ b/target/i386/kvm.c
> > > > @@ -999,6 +999,18 @@ int kvm_arch_init_vcpu(CPUState *cs)
> > > >  }
> > > >  }
> > > >  
> > > > +if (env->features[FEAT_KVM_HINTS] & KVM_HINTS_DEDICATED) {
> > > > +int disable_exits = kvm_check_extension(cs->kvm_state, 
> > > > KVM_CAP_X86_DISABLE_EXITS);
> > > > +if (disable_exits) {
> > > > +disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT |
> > > > +  KVM_X86_DISABLE_EXITS_HLT |
> > > > +  KVM_X86_DISABLE_EXITS_PAUSE);
> > > > +}
> > > > +if (kvm_vm_enable_cap(cs->kvm_state, 
> > > > KVM_CAP_X86_DISABLE_EXITS, 0, disable_exits)) {
> > > > +error_report("kvm: DISABLE EXITS not supported");
> > > > +}
> > > > +}
> > > > +
> > > >  qemu_add_vm_change_state_handler(cpu_update_state, env);
> > > >  
> > > >  c = cpuid_find_entry(_data.cpuid, 1, 0);
> > > 
> > > Why not a bit per capability?
> > > I can see how someone might want to disable mwait exists
> > > but not the rest of them.
> > 
> > kvm-hint-dedicated=on should be used only if the physical CPU is
> > dedicated to the VCPU task.  Are there any advantages of getting
> > vmexits for HLT and PAUSE if no other task is going to use the
> > CPU?
> 
> No but there are advantages to using mwait even without a dedicated host
> CPU (VCPUs can wake up each other without exiting to hypervisor).

Are there any downsides?  What needs to be taken into account
when deciding if mwait exits can be safely disabled?


> 
> Which is my point - there should be a separate flag to disable mwait
> exiting only.

Adding new command-line option is possible, but not necessary for
the dedicated-CPU use case.  This means this patch is already
useful without adding new flags.

-- 
Eduardo



Re: [Qemu-devel] [PATCH] blockjob: use qapi enum helpers

2018-03-28 Thread Jeff Cody
On Tue, Mar 27, 2018 at 05:30:11PM +0200, Marc-André Lureau wrote:
> QAPI generator provide #define helpers for looking up enum string.
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  blockjob.c | 14 +-
>  1 file changed, 5 insertions(+), 9 deletions(-)
> 
> diff --git a/blockjob.c b/blockjob.c
> index ef3ed69ff1..11c9ce124d 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -75,10 +75,8 @@ static void block_job_state_transition(BlockJob *job, 
> BlockJobStatus s1)
>  assert(s1 >= 0 && s1 <= BLOCK_JOB_STATUS__MAX);
>  trace_block_job_state_transition(job, job->ret, BlockJobSTT[s0][s1] ?
>   "allowed" : "disallowed",
> - qapi_enum_lookup(_lookup,
> -  s0),
> - qapi_enum_lookup(_lookup,
> -  s1));
> + BlockJobStatus_str(s0),
> + BlockJobStatus_str(s1));
>  assert(BlockJobSTT[s0][s1]);
>  job->status = s1;
>  }
> @@ -86,17 +84,15 @@ static void block_job_state_transition(BlockJob *job, 
> BlockJobStatus s1)
>  static int block_job_apply_verb(BlockJob *job, BlockJobVerb bv, Error **errp)
>  {
>  assert(bv >= 0 && bv <= BLOCK_JOB_VERB__MAX);
> -trace_block_job_apply_verb(job, qapi_enum_lookup(_lookup,
> - job->status),
> -   qapi_enum_lookup(_lookup, bv),
> +trace_block_job_apply_verb(job, BlockJobStatus_str(job->status),
> +   BlockJobVerb_str(bv),
> BlockJobVerbTable[bv][job->status] ?
> "allowed" : "prohibited");
>  if (BlockJobVerbTable[bv][job->status]) {
>  return 0;
>  }
>  error_setg(errp, "Job '%s' in state '%s' cannot accept command verb 
> '%s'",
> -   job->id, qapi_enum_lookup(_lookup, 
> job->status),
> -   qapi_enum_lookup(_lookup, bv));
> +   job->id, BlockJobStatus_str(job->status), 
> BlockJobVerb_str(bv));
>  return -EPERM;
>  }
>  
> -- 
> 2.17.0.rc1.1.g4c4f2b46a3
> 

Thanks,

Looks like a bug fix to me.

Applied to my block branch:

git://github.com/codyprime/qemu-kvm-jtc block

-Jeff



Re: [Qemu-devel] [PATCH v2 1/1] blockjob: leak fix, remove from txn when failing early

2018-03-28 Thread Jeff Cody
On Wed, Mar 28, 2018 at 04:28:05PM +0200, Marc-André Lureau wrote:
> On Wed, Mar 28, 2018 at 4:09 PM, Jeff Cody  wrote:
> > From: Marc-André Lureau 
> >
> > This fixes leaks found by ASAN such as:
> >   GTESTER tests/test-blockjob
> > =
> > ==31442==ERROR: LeakSanitizer: detected memory leaks
> >
> > Direct leak of 24 byte(s) in 1 object(s) allocated from:
> > #0 0x7f88483cba38 in __interceptor_calloc (/lib64/libasan.so.4+0xdea38)
> > #1 0x7f8845e1bd77 in g_malloc0 ../glib/gmem.c:129
> > #2 0x7f8845e1c04b in g_malloc0_n ../glib/gmem.c:360
> > #3 0x5584d2732498 in block_job_txn_new 
> > /home/elmarco/src/qemu/blockjob.c:172
> > #4 0x5584d2739b28 in block_job_create 
> > /home/elmarco/src/qemu/blockjob.c:973
> > #5 0x5584d270ae31 in mk_job 
> > /home/elmarco/src/qemu/tests/test-blockjob.c:34
> > #6 0x5584d270b1c1 in do_test_id 
> > /home/elmarco/src/qemu/tests/test-blockjob.c:57
> > #7 0x5584d270b65c in test_job_ids 
> > /home/elmarco/src/qemu/tests/test-blockjob.c:118
> > #8 0x7f8845e40b69 in test_case_run ../glib/gtestutils.c:2255
> > #9 0x7f8845e40f29 in g_test_run_suite_internal ../glib/gtestutils.c:2339
> > #10 0x7f8845e40fd2 in g_test_run_suite_internal 
> > ../glib/gtestutils.c:2351
> > #11 0x7f8845e411e9 in g_test_run_suite ../glib/gtestutils.c:2426
> > #12 0x7f8845e3fe72 in g_test_run ../glib/gtestutils.c:1692
> > #13 0x5584d270d6e2 in main 
> > /home/elmarco/src/qemu/tests/test-blockjob.c:377
> > #14 0x7f8843641f29 in __libc_start_main (/lib64/libc.so.6+0x20f29)
> >
> > Add an assert to make sure that the job doesn't have associated txn before 
> > free().
> >
> > [Jeff Cody: N.B., used updated patch provided by John Snow]
> 
> Looks good to me, so :)
> Signed-off-by: Marc-André Lureau 
> 
> thanks

Thanks,

Applied to my block branch:

git://github.com/codyprime/qemu-kvm-jtc block

-Jeff

> 
> >
> > ---
> >  blockjob.c | 14 --
> >  1 file changed, 12 insertions(+), 2 deletions(-)
> >
> > diff --git a/blockjob.c b/blockjob.c
> > index ef3ed69ff1..c510a9fde5 100644
> > --- a/blockjob.c
> > +++ b/blockjob.c
> > @@ -204,6 +204,15 @@ void block_job_txn_add_job(BlockJobTxn *txn, BlockJob 
> > *job)
> >  block_job_txn_ref(txn);
> >  }
> >
> > +static void block_job_txn_del_job(BlockJob *job)
> > +{
> > +if (job->txn) {
> > +QLIST_REMOVE(job, txn_list);
> > +block_job_txn_unref(job->txn);
> > +job->txn = NULL;
> > +}
> > +}
> > +
> >  static void block_job_pause(BlockJob *job)
> >  {
> >  job->pause_count++;
> > @@ -232,6 +241,7 @@ void block_job_unref(BlockJob *job)
> >  {
> >  if (--job->refcnt == 0) {
> >  assert(job->status == BLOCK_JOB_STATUS_NULL);
> > +assert(!job->txn);
> >  BlockDriverState *bs = blk_bs(job->blk);
> >  QLIST_REMOVE(job, job_list);
> >  bs->job = NULL;
> > @@ -392,6 +402,7 @@ static void block_job_decommission(BlockJob *job)
> >  job->busy = false;
> >  job->paused = false;
> >  job->deferred_to_main_loop = true;
> > +block_job_txn_del_job(job);
> >  block_job_state_transition(job, BLOCK_JOB_STATUS_NULL);
> >  block_job_unref(job);
> >  }
> > @@ -481,8 +492,7 @@ static int block_job_finalize_single(BlockJob *job)
> >  }
> >  }
> >
> > -QLIST_REMOVE(job, txn_list);
> > -block_job_txn_unref(job->txn);
> > +block_job_txn_del_job(job);
> >  block_job_conclude(job);
> >  return 0;
> >  }
> > --
> > 2.13.6
> >





Re: [Qemu-devel] [PATCH for-2.12 0/2] RISC-V: Mark FP status dirty

2018-03-28 Thread Michael Clark
On Tue, Mar 27, 2018 at 7:22 PM, Richard Henderson <
richard.hender...@linaro.org> wrote:

> Since it was my patch that broke FP state tracking in the
> first place, I feel obligated to fix it again.
>
> Mark mstatus[fs] as dirty whenever we write to the file.
> This can be optimized by only doing so once within a TB
> which initially began with a clean file.
>
> I have not yet put together an environment that can test
> this, so I'll need someone else to give it a go.
>

I have tested it with the simple test case running SMP Linux and it appears
okay (Note it must be compiled with -O2):

 http://oirase.annexia.org/tmp/sched.c

It is clearly broken without your two patches.


Re: [Qemu-devel] [PATCH for-2.12 v4] iotests: Test abnormally large size in compressed cluster descriptor

2018-03-28 Thread Eric Blake

On 03/28/2018 12:34 PM, Max Reitz wrote:

On 2018-03-22 13:41, Alberto Garcia wrote:

L2 entries for compressed clusters have a field that indicates the
number of sectors used to store the data in the image.

That's however not the size of the compressed data itself, just the
number of sectors where that data is located. The actual data size is
usually not a multiple of the sector size, and therefore cannot be
represented with this field.




+++ b/tests/qemu-iotests/122


Not sure if 122 is the right file for this...

Or, let me rephrase, it does look to me like it is not the right file.
But on the other hand, I don't see a more suitable file.


Short of cloning 122 as the starting point and creating a new test file.




@@ -130,6 +130,51 @@ $QEMU_IO -c "read -P 01024k 1022k" "$TEST_IMG" 2>&1 | 
_filter_qemu_io | _fil
  
  
  echo

+echo "=== Corrupted size field in compressed cluster descriptor ==="
+echo
+# Create an empty image, fill half of it with data and compress it.
+# The L2 entries of the two compressed clusters are located at
+# 0x80 and 0x88, their original values are 0x400800a0
+# and 0x400800a00802 (5 sectors for compressed data each).
+TEST_IMG="$TEST_IMG".1 _make_test_img 8M
+$QEMU_IO -c "write -P 0x11 0 4M" "$TEST_IMG".1 2>&1 | _filter_qemu_io | 
_filter_testdir
+$QEMU_IMG convert -c -O qcow2 -o cluster_size=2M "$TEST_IMG".1 "$TEST_IMG"


Why not just use "write -c" and drop the convert?  (You'd have to use
two writes, though, one for each cluster.)


'write -c' is newer code; it may work, but it may also cause offsets to 
live elsewhere for knock-on effects later in the test. (It used to be 
compression was only possible during convert)




+
+# Here the image is too small so we're asking QEMU to read beyond the
+# end of the image.
+$QEMU_IO -c "read  -P 0x11  0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | 
_filter_testdir
+# But if we grow the image we won't be reading beyond its end anymore.
+$QEMU_IO -c "write -P 0x22 4M 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | 
_filter_testdir
+$QEMU_IO -c "read  -P 0x11  0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | 
_filter_testdir


Both reads result in exactly the same output, though, so it doesn't seem
like qemu cares.

(This is the reason I'm not merging this patch now, because that looks a
bit fishy.)


Hmm, you have an interesting point - should the read fail (you asked me 
to read more clusters than I have available) or succeed (since you asked 
me to read beyond EOF, I filled the tail of the buffer with all zeroes, 
then tried decompressing, and since decompression worked, it obviously 
didn't need the bytes that I filled in as zero).  It's a little nicer to 
fail (the image is fishy for requesting more clusters than are present, 
even if what IS present is sufficient to reconstruct the data).





+
+# The refcount data is however wrong because due to the increased size
+# of the compressed data it now reaches the following host clusters.
+# This can be repaired by qemu-img check.


The OFLAG_COPIED repair looks a bit interesting, but, er, well.

Max

(Since a compressed cluster does not correspond 1:1 to a host cluster,
you cannot say that a compressed cluster has a refcount -- only host
clusters are refcounted.  So what is it supposed to mean that a
compressed cluster has a refcount of 1?


A compressed cluster may affect the refcount of multiple clusters: the 
cluster that contains the initial offset, and the cluster that contains 
any of the nb_sectors that did not fit in the first cluster.  So, if I 
have a 4k-cluster image (where each character is a sector), and where 
the compressed clusters are nicely sector-aligned:


|1---|2---|3---|
|AABB||CC--|

Here, the L2 entry for A, B, and C each list nb_sectors of 5, as it 
takes 6 sectors to list the entire image, but nb_sectors does not 
include the sector that includes the original offset.  The refcount for 
cluster 1 is 2 (the full contents of compressed A and the first half of 
compressed B); for cluster 2 is 2 (the second half of compressed B and 
the first half of compressed C); and for cluster 3 is 1 (the second half 
of compressed C).


But what this patch is dealing with is when nb_sectors is larger than 
required.  With 4k sectors, qemu will never populate nb_clusters more 
than 8 (if the output is not nicely aligned, and 4096 bytes compresses 
down to only 4095, we can end up with 1 byte in the first sector, then 7 
complete sectors, then 510 bytes in a final sector, for 8 sectors beyond 
the initial offset).  But the qcow2 image is still valid even if the L2 
entry claims nb_sectors of 15; if that happens, then a compressed 
cluster can now affect the refcount of 3 clusters rather than the usual 
1 or 2.




I'd argue from a point of usefulness.  In theory, you could modify
compressed clusters in-place, and then you'd need the information
whether you are allowed to.  But that doesn't really depend on whether
the host clusters 

Re: [Qemu-devel] [PATCH] WHPX fixes an issue with CPUID 1 not returning CPUID_EXT_HYPERVISOR

2018-03-28 Thread Eduardo Habkost
On Mon, Mar 26, 2018 at 10:06:58AM -0700, Justin Terry (VM) wrote:
> Implements the CPUID trap for CPUID 1 to include the
> CPUID_EXT_HYPERVISOR flag in the ECX results. This was preventing some
> older linux kernels from booting when trying to access MSR's that dont
> make sense when virtualized.
> 
> Signed-off-by: Justin Terry (VM) 
> ---
>  target/i386/whpx-all.c | 79 
> +-
>  1 file changed, 78 insertions(+), 1 deletion(-)
> 
> diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c
> index bf33d320bf..58435178a4 100644
> --- a/target/i386/whpx-all.c
> +++ b/target/i386/whpx-all.c
> @@ -911,12 +911,62 @@ static int whpx_vcpu_run(CPUState *cpu)
>  ret = 1;
>  break;
>  
> +case WHvRunVpExitReasonX64Cpuid: {
> +WHV_REGISTER_VALUE reg_values[5] = {0};
> +WHV_REGISTER_NAME reg_names[5];
> +UINT32 reg_count = 5;
> +UINT64 rip, rax, rcx, rdx, rbx;
> +
> +rip = vcpu->exit_ctx.VpContext.Rip +
> +  vcpu->exit_ctx.VpContext.InstructionLength;
> +switch (vcpu->exit_ctx.CpuidAccess.Rax) {
> +case 1:
> +rax = vcpu->exit_ctx.CpuidAccess.DefaultResultRax;
> +/* Advertise that we are running on a hypervisor */
> +rcx =
> +vcpu->exit_ctx.CpuidAccess.DefaultResultRcx |
> +CPUID_EXT_HYPERVISOR;
> +
> +rdx = vcpu->exit_ctx.CpuidAccess.DefaultResultRdx;
> +rbx = vcpu->exit_ctx.CpuidAccess.DefaultResultRbx;
> +break;
> +default:
> +rax = vcpu->exit_ctx.CpuidAccess.DefaultResultRax;
> +rcx = vcpu->exit_ctx.CpuidAccess.DefaultResultRcx;
> +rdx = vcpu->exit_ctx.CpuidAccess.DefaultResultRdx;
> +rbx = vcpu->exit_ctx.CpuidAccess.DefaultResultRbx;

Interesting, so the WHPX API already tries to provide default
values for the CPUID leaves.  Would it make sense to try and use
the values returned by cpu_x86_cpuid() in the future?

Is there a way to get the default CPUID results from the WHPX API
without calling WHvRunVirtualProcessor(), so QEMU can be aware of
what exactly the guest is seeing on CPUID?


> +}
> +
> +reg_names[0] = WHvX64RegisterRip;
> +reg_names[1] = WHvX64RegisterRax;
> +reg_names[2] = WHvX64RegisterRcx;
> +reg_names[3] = WHvX64RegisterRdx;
> +reg_names[4] = WHvX64RegisterRbx;
> +
> +reg_values[0].Reg64 = rip;
> +reg_values[1].Reg64 = rax;
> +reg_values[2].Reg64 = rcx;
> +reg_values[3].Reg64 = rdx;
> +reg_values[4].Reg64 = rbx;
> +
> +hr = WHvSetVirtualProcessorRegisters(whpx->partition,
> + cpu->cpu_index,
> + reg_names,
> + reg_count,
> + reg_values);
> +
> +if (FAILED(hr)) {
> +error_report("WHPX: Failed to set CpuidAccess state 
> registers,"
> + " hr=%08lx", hr);
> +}
> +ret = 0;
> +break;
> +}
>  case WHvRunVpExitReasonNone:
>  case WHvRunVpExitReasonUnrecoverableException:
>  case WHvRunVpExitReasonInvalidVpRegisterValue:
>  case WHvRunVpExitReasonUnsupportedFeature:
>  case WHvRunVpExitReasonX64MsrAccess:
> -case WHvRunVpExitReasonX64Cpuid:
>  case WHvRunVpExitReasonException:
>  default:
>  error_report("WHPX: Unexpected VP exit code %d",
> @@ -1272,6 +1322,33 @@ static int whpx_accel_init(MachineState *ms)
>  goto error;
>  }
>  
> +memset(, 0, sizeof(WHV_PARTITION_PROPERTY));
> +prop.ExtendedVmExits.X64CpuidExit = 1;
> +hr = WHvSetPartitionProperty(whpx->partition,
> + WHvPartitionPropertyCodeExtendedVmExits,
> + ,
> + sizeof(WHV_PARTITION_PROPERTY));
> +
> +if (FAILED(hr)) {
> +error_report("WHPX: Failed to enable partition extended X64CpuidExit"
> + " hr=%08lx", hr);
> +ret = -EINVAL;
> +goto error;
> +}
> +
> +UINT32 cpuidExitList[] = {1};
> +hr = WHvSetPartitionProperty(whpx->partition,
> + WHvPartitionPropertyCodeCpuidExitList,
> + cpuidExitList,
> + RTL_NUMBER_OF(cpuidExitList) * 
> sizeof(UINT32));
> +
> +if (FAILED(hr)) {
> +error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
> + hr);
> +ret = -EINVAL;
> +goto error;
> +}
> +
>  hr = 

Re: [Qemu-devel] [PATCH 2/2] target/riscv: Mark MSTATUS_FS dirty

2018-03-28 Thread Michael Clark
On Wed, Mar 28, 2018 at 10:36 AM, Michael Clark  wrote:

>
>
> On Tue, Mar 27, 2018 at 7:22 PM, Richard Henderson <
> richard.hender...@linaro.org> wrote:
>
>> Writes to the FP register file mark the register file as dirty.
>>
>> Signed-off-by: Richard Henderson 
>> ---
>>  target/riscv/op_helper.c | 25 +
>>  target/riscv/translate.c | 40 +++-
>>  2 files changed, 56 insertions(+), 9 deletions(-)
>>
>> diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
>> index e34715df4e..74eeef0be8 100644
>> --- a/target/riscv/op_helper.c
>> +++ b/target/riscv/op_helper.c
>> @@ -72,11 +72,20 @@ void helper_raise_exception(CPURISCVState *env,
>> uint32_t exception)
>>  do_raise_exception_err(env, exception, 0);
>>  }
>>
>> -static void validate_mstatus_fs(CPURISCVState *env, uintptr_t ra)
>> +static void validate_mstatus_fs(CPURISCVState *env, uintptr_t ra, bool
>> write)
>>  {
>>  #ifndef CONFIG_USER_ONLY
>> -if (!(env->mstatus & MSTATUS_FS)) {
>> +switch (get_field(env->mstatus, MSTATUS_FS)) {
>> +case 0: /* disabled */
>>  do_raise_exception_err(env, RISCV_EXCP_ILLEGAL_INST, ra);
>> +g_assert_not_reached();
>> +case 1: /* initial */
>> +case 2: /* clean */
>> +if (write) {
>> +/* Mark fp status as dirty.  */
>> +env->mstatus = MSTATUS_FS;
>> +}
>> +break;
>>  }
>>  #endif
>>  }
>> @@ -96,15 +105,15 @@ void csr_write_helper(CPURISCVState *env,
>> target_ulong val_to_write,
>>
>>  switch (csrno) {
>>  case CSR_FFLAGS:
>> -validate_mstatus_fs(env, GETPC());
>> +validate_mstatus_fs(env, GETPC(), true);
>>  cpu_riscv_set_fflags(env, val_to_write & (FSR_AEXC >>
>> FSR_AEXC_SHIFT));
>>  break;
>>  case CSR_FRM:
>> -validate_mstatus_fs(env, GETPC());
>> +validate_mstatus_fs(env, GETPC(), true);
>>  env->frm = val_to_write & (FSR_RD >> FSR_RD_SHIFT);
>>  break;
>>  case CSR_FCSR:
>> -validate_mstatus_fs(env, GETPC());
>> +validate_mstatus_fs(env, GETPC(), true);
>>  env->frm = (val_to_write & FSR_RD) >> FSR_RD_SHIFT;
>>  cpu_riscv_set_fflags(env, (val_to_write & FSR_AEXC) >>
>> FSR_AEXC_SHIFT);
>>  break;
>> @@ -379,13 +388,13 @@ target_ulong csr_read_helper(CPURISCVState *env,
>> target_ulong csrno)
>>
>>  switch (csrno) {
>>  case CSR_FFLAGS:
>> -validate_mstatus_fs(env, GETPC());
>> +validate_mstatus_fs(env, GETPC(), false);
>>  return cpu_riscv_get_fflags(env);
>>  case CSR_FRM:
>> -validate_mstatus_fs(env, GETPC());
>> +validate_mstatus_fs(env, GETPC(), false);
>>  return env->frm;
>>  case CSR_FCSR:
>> -validate_mstatus_fs(env, GETPC());
>> +validate_mstatus_fs(env, GETPC(), false);
>>  return (cpu_riscv_get_fflags(env) << FSR_AEXC_SHIFT)
>>  | (env->frm << FSR_RD_SHIFT);
>>  /* rdtime/rdtimeh is trapped and emulated by bbl in system mode */
>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
>> index a30724aa90..08fc42a679 100644
>> --- a/target/riscv/translate.c
>> +++ b/target/riscv/translate.c
>> @@ -660,6 +660,31 @@ static void gen_store(DisasContext *ctx, uint32_t
>> opc, int rs1, int rs2,
>>  tcg_temp_free(dat);
>>  }
>>
>> +#ifndef CONFIG_USER_ONLY
>> +/* The states of mstatus_fs are:
>> + * 0 = disabled, 1 = initial, 2 = clean, 3 = dirty
>> + * We will have already diagnosed disabled state,
>> + * and need to turn initial/clean into dirty.
>> + */
>> +static void mark_fs_dirty(DisasContext *ctx)
>> +{
>> +TCGv tmp;
>> +if (ctx->mstatus_fs == MSTATUS_FS) {
>> +return;
>> +}
>> +/* Remember the state change for the rest of the TB.  */
>> +ctx->mstatus_fs = MSTATUS_FS;
>> +
>> +tmp = tcg_temp_new();
>> +tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
>> +tcg_gen_ori_tl(tmp, tmp, MSTATUS_FS);
>> +tcg_gen_st_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
>> +tcg_temp_free(tmp);
>> +}
>> +#else
>> +static inline void mark_fs_dirty(DisasContext *ctx) { }
>> +#endif
>> +
>>  static void gen_fp_load(DisasContext *ctx, uint32_t opc, int rd,
>>  int rs1, target_long imm)
>>  {
>> @@ -688,6 +713,8 @@ static void gen_fp_load(DisasContext *ctx, uint32_t
>> opc, int rd,
>>  break;
>>  }
>>  tcg_temp_free(t0);
>> +
>> +mark_fs_dirty(ctx);
>>  }
>>
>
> Don't we want the mark_fs_dirty(ctx) to be at the end of gen_fp_store
> instead of gen_fp_load?
>

Sorry I was thinking of storing to the fp register file vs storing to
memory. The code is of course correct.


>  static void gen_fp_store(DisasContext *ctx, uint32_t opc, int rs1,
>> @@ -985,6 +1012,7 @@ static void gen_fp_arith(DisasContext *ctx, uint32_t
>> opc, int rd,
>>   int rs1, int rs2, int rm)
>>  {
>>   

Re: [Qemu-devel] [PATCH 2/2] target/riscv: Mark MSTATUS_FS dirty

2018-03-28 Thread Michael Clark
On Tue, Mar 27, 2018 at 7:22 PM, Richard Henderson <
richard.hender...@linaro.org> wrote:

> Writes to the FP register file mark the register file as dirty.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/riscv/op_helper.c | 25 +
>  target/riscv/translate.c | 40 +++-
>  2 files changed, 56 insertions(+), 9 deletions(-)
>
> diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
> index e34715df4e..74eeef0be8 100644
> --- a/target/riscv/op_helper.c
> +++ b/target/riscv/op_helper.c
> @@ -72,11 +72,20 @@ void helper_raise_exception(CPURISCVState *env,
> uint32_t exception)
>  do_raise_exception_err(env, exception, 0);
>  }
>
> -static void validate_mstatus_fs(CPURISCVState *env, uintptr_t ra)
> +static void validate_mstatus_fs(CPURISCVState *env, uintptr_t ra, bool
> write)
>  {
>  #ifndef CONFIG_USER_ONLY
> -if (!(env->mstatus & MSTATUS_FS)) {
> +switch (get_field(env->mstatus, MSTATUS_FS)) {
> +case 0: /* disabled */
>  do_raise_exception_err(env, RISCV_EXCP_ILLEGAL_INST, ra);
> +g_assert_not_reached();
> +case 1: /* initial */
> +case 2: /* clean */
> +if (write) {
> +/* Mark fp status as dirty.  */
> +env->mstatus = MSTATUS_FS;
> +}
> +break;
>  }
>  #endif
>  }
> @@ -96,15 +105,15 @@ void csr_write_helper(CPURISCVState *env,
> target_ulong val_to_write,
>
>  switch (csrno) {
>  case CSR_FFLAGS:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), true);
>  cpu_riscv_set_fflags(env, val_to_write & (FSR_AEXC >>
> FSR_AEXC_SHIFT));
>  break;
>  case CSR_FRM:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), true);
>  env->frm = val_to_write & (FSR_RD >> FSR_RD_SHIFT);
>  break;
>  case CSR_FCSR:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), true);
>  env->frm = (val_to_write & FSR_RD) >> FSR_RD_SHIFT;
>  cpu_riscv_set_fflags(env, (val_to_write & FSR_AEXC) >>
> FSR_AEXC_SHIFT);
>  break;
> @@ -379,13 +388,13 @@ target_ulong csr_read_helper(CPURISCVState *env,
> target_ulong csrno)
>
>  switch (csrno) {
>  case CSR_FFLAGS:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), false);
>  return cpu_riscv_get_fflags(env);
>  case CSR_FRM:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), false);
>  return env->frm;
>  case CSR_FCSR:
> -validate_mstatus_fs(env, GETPC());
> +validate_mstatus_fs(env, GETPC(), false);
>  return (cpu_riscv_get_fflags(env) << FSR_AEXC_SHIFT)
>  | (env->frm << FSR_RD_SHIFT);
>  /* rdtime/rdtimeh is trapped and emulated by bbl in system mode */
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index a30724aa90..08fc42a679 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -660,6 +660,31 @@ static void gen_store(DisasContext *ctx, uint32_t
> opc, int rs1, int rs2,
>  tcg_temp_free(dat);
>  }
>
> +#ifndef CONFIG_USER_ONLY
> +/* The states of mstatus_fs are:
> + * 0 = disabled, 1 = initial, 2 = clean, 3 = dirty
> + * We will have already diagnosed disabled state,
> + * and need to turn initial/clean into dirty.
> + */
> +static void mark_fs_dirty(DisasContext *ctx)
> +{
> +TCGv tmp;
> +if (ctx->mstatus_fs == MSTATUS_FS) {
> +return;
> +}
> +/* Remember the state change for the rest of the TB.  */
> +ctx->mstatus_fs = MSTATUS_FS;
> +
> +tmp = tcg_temp_new();
> +tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
> +tcg_gen_ori_tl(tmp, tmp, MSTATUS_FS);
> +tcg_gen_st_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
> +tcg_temp_free(tmp);
> +}
> +#else
> +static inline void mark_fs_dirty(DisasContext *ctx) { }
> +#endif
> +
>  static void gen_fp_load(DisasContext *ctx, uint32_t opc, int rd,
>  int rs1, target_long imm)
>  {
> @@ -688,6 +713,8 @@ static void gen_fp_load(DisasContext *ctx, uint32_t
> opc, int rd,
>  break;
>  }
>  tcg_temp_free(t0);
> +
> +mark_fs_dirty(ctx);
>  }
>

Don't we want the mark_fs_dirty(ctx) to be at the end of gen_fp_store
instead of gen_fp_load?



>  static void gen_fp_store(DisasContext *ctx, uint32_t opc, int rs1,
> @@ -985,6 +1012,7 @@ static void gen_fp_arith(DisasContext *ctx, uint32_t
> opc, int rd,
>   int rs1, int rs2, int rm)
>  {
>  TCGv t0 = NULL;
> +bool fp_output = true;
>
>  if (ctx->mstatus_fs == 0) {
>  goto do_illegal;
> @@ -1047,6 +1075,7 @@ static void gen_fp_arith(DisasContext *ctx, uint32_t
> opc, int rd,
>  }
>  gen_set_gpr(rd, t0);
>  tcg_temp_free(t0);
> +fp_output = false;
>  

Re: [Qemu-devel] [PATCH for-2.12] migration: Don't activate block devices if using -S

2018-03-28 Thread Eric Blake

On 03/28/2018 12:02 PM, Dr. David Alan Gilbert (git) wrote:

From: "Dr. David Alan Gilbert" 

Activating the block devices causes the locks to be taken on
the backing file.  If we're running with -S and the destination libvirt
hasn't started the destination with 'cont', it's expecting the locks are
still untaken.

Don't activate the block devices if we're not going to autostart the VM;
'cont' already will do that anyway.

bz: https://bugzilla.redhat.com/show_bug.cgi?id=1560854
Signed-off-by: Dr. David Alan Gilbert 
---
  migration/migration.c | 22 +++---
  1 file changed, 15 insertions(+), 7 deletions(-)


Sounds like 2.12 material.

Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



Re: [Qemu-devel] [PATCH] file-posix: Support fallocate for block device

2018-03-28 Thread Eric Blake

On 03/27/2018 09:37 PM, zhenwei.pi wrote:

since linux 4.9, block device supports fallocate. kernel issues
block device zereout request and invalidates page cache. So
ioctl(fd, FALLOC_FL_ZERO_RANGE...) is safer than ioctl(fd,


did you mean fallocate() in the first half of the sentence?


BLKZEROOUT...). try to call do_fallocate, if failing, fallback.

use new field "has_fallocate_zero_range" with default value as
true. if do_fallocate returns -ENOTSUP, it will be set false.

Signed-off-by: zhenwei.pi 
---
  block/file-posix.c | 27 +--
  1 file changed, 17 insertions(+), 10 deletions(-)



This feels more like a feature for 2.13, than a bug fix that would fit 
during freeze for 2.12.



diff --git a/block/file-posix.c b/block/file-posix.c
index d7fb772..842e940 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -159,8 +159,9 @@ typedef struct BDRVRawState {
  bool discard_zeroes:1;
  bool use_linux_aio:1;
  bool page_cache_inconsistent:1;
-bool has_fallocate;
-bool needs_alignment;
+bool has_fallocate:1;
+bool has_fallocate_zero_range:1;
+bool needs_alignment:1;
  
  PRManager *pr_mgr;

  } BDRVRawState;
@@ -549,6 +550,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
  
  s->has_discard = true;

  s->has_write_zeroes = true;
+s->has_fallocate_zero_range = true;


Is blindly setting this to true reasonable, given that...


  if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
  s->needs_alignment = true;
  }
@@ -1365,10 +1367,6 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData 
*aiocb)
  int64_t len;
  #endif
  
-if (aiocb->aio_type & QEMU_AIO_BLKDEV) {

-return handle_aiocb_write_zeroes_block(aiocb);
-}
-
  #ifdef CONFIG_XFS
  if (s->is_xfs) {
  return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
@@ -1376,16 +1374,25 @@ static ssize_t 
handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
  #endif
  
  #ifdef CONFIG_FALLOCATE_ZERO_RANGE

-if (s->has_write_zeroes) {


...later use is guarded by something learned at compile time?


+/* since linux 4.9, block device supports fallocate. kernel issues


s/since/Since/
s/device supports/devices support/
s/kernel issues/The kernel issues a/


+ * block device zereout request and invalidates page cache. So
+ * ioctl(fd, FALLOC_FL_ZERO_RANGE...) is safer than ioctl(fd,


Same comment as on commit message; this looks like you meant fallocate 
rather than ioctl on one of the two uses.



+ * BLKZEROOUT...). try to call do_fallocate, if failing, fallback.


s/try/Try/
s/if failing, fallback/and fall back if that fails/


+ */
+if (s->has_fallocate_zero_range) {
  int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
 aiocb->aio_offset, aiocb->aio_nbytes);
-if (ret == 0 || ret != -ENOTSUP) {
+if (ret == 0) {
  return ret;
-}
-s->has_write_zeroes = false;
+} else if (ret == -ENOTSUP)
+s->has_fallocate_zero_range = false;
  }


Before your patch, if we get any failure other than -ENOTSUP, we exit 
immediately rather than attempting a fallback.  Your code breaks that 
paradigm, and blindly attempts the fallback even when the failure was 
something like EIO.



  #endif
  
+if (aiocb->aio_type & QEMU_AIO_BLKDEV) {

+return handle_aiocb_write_zeroes_block(aiocb);
+}
+
  #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
  if (s->has_discard && s->has_fallocate) {
  int ret = do_fallocate(s->fd,



--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



Re: [Qemu-devel] [PATCH for-2.12 v4] iotests: Test abnormally large size in compressed cluster descriptor

2018-03-28 Thread Max Reitz
On 2018-03-22 13:41, Alberto Garcia wrote:
> L2 entries for compressed clusters have a field that indicates the
> number of sectors used to store the data in the image.
> 
> That's however not the size of the compressed data itself, just the
> number of sectors where that data is located. The actual data size is
> usually not a multiple of the sector size, and therefore cannot be
> represented with this field.
> 
> The way it works is that QEMU reads all the specified sectors and
> starts decompressing the data until there's enough to recover the
> original uncompressed cluster. If there are any bytes left that
> haven't been decompressed they are simply ignored.
> 
> One consequence of this is that even if the size field is larger than
> it needs to be QEMU can handle it just fine: it will read more data
> from disk but it will ignore the extra bytes.
> 
> This test creates an image with two compressed clusters that use 5
> sectors (2.5 KB) each, increases the size field to the maximum (8192
> sectors, or 4 MB) and verifies that the data can be read without
> problems.
> 
> This test is important because while the decompressed data takes
> exactly one cluster, the maximum value allowed in the compressed size
> field is twice the cluster size. So although QEMU won't produce images
> with such large values we need to make sure that it can handle them.
> 
> Another effect of increasing the size field is that it can make
> it include data from the following host cluster(s). In this case
> 'qemu-img check' will detect that the refcounts are not correct, and
> we'll need to rebuild them.
> 
> Additionally, this patch also tests that decreasing the size corrupts
> the image since the original data can no longer be recovered. In this
> case QEMU returns an error when trying to read the compressed data,
> but 'qemu-img check' doesn't see anything wrong if the refcounts are
> consistent.
> 
> One possible task for the future is to make 'qemu-img check' verify
> the sizes of the compressed clusters, by trying to decompress the data
> and checking that the size stored in the L2 entry is correct.
> 
> Signed-off-by: Alberto Garcia 
> Reviewed-by: Eric Blake 
> ---
> v4: Resend for 2.12
> 
> v3: Add TODO comment, as suggested by Eric.
> 
> Corrupt the length of the second compressed cluster as well so the
> uncompressed data would span three host clusters.
> 
> v2: We now have two scenarios where we make QEMU read data from the
> next host cluster and from beyond the end of the image. This
> version also runs qemu-img check on the corrupted image.
> 
> If the size field is too small, reading fails but qemu-img check
> succeeds.
> 
> If the size field is too large, reading succeeds but qemu-img
> check fails (this can be repaired, though).
> ---
>  tests/qemu-iotests/122 | 45 +
>  tests/qemu-iotests/122.out | 31 +++
>  2 files changed, 76 insertions(+)
> 
> diff --git a/tests/qemu-iotests/122 b/tests/qemu-iotests/122
> index 45b359c2ba..5b9593016c 100755
> --- a/tests/qemu-iotests/122
> +++ b/tests/qemu-iotests/122

Not sure if 122 is the right file for this...

Or, let me rephrase, it does look to me like it is not the right file.
But on the other hand, I don't see a more suitable file.

> @@ -130,6 +130,51 @@ $QEMU_IO -c "read -P 01024k 1022k" "$TEST_IMG" 2>&1 
> | _filter_qemu_io | _fil
>  
>  
>  echo
> +echo "=== Corrupted size field in compressed cluster descriptor ==="
> +echo
> +# Create an empty image, fill half of it with data and compress it.
> +# The L2 entries of the two compressed clusters are located at
> +# 0x80 and 0x88, their original values are 0x400800a0
> +# and 0x400800a00802 (5 sectors for compressed data each).
> +TEST_IMG="$TEST_IMG".1 _make_test_img 8M
> +$QEMU_IO -c "write -P 0x11 0 4M" "$TEST_IMG".1 2>&1 | _filter_qemu_io | 
> _filter_testdir
> +$QEMU_IMG convert -c -O qcow2 -o cluster_size=2M "$TEST_IMG".1 "$TEST_IMG"

Why not just use "write -c" and drop the convert?  (You'd have to use
two writes, though, one for each cluster.)

> +
> +# Reduce size of compressed data to 4 sectors: this corrupts the image.
> +poke_file "$TEST_IMG" $((0x80)) "\x40\x06"
> +$QEMU_IO -c "read  -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | 
> _filter_testdir
> +
> +# 'qemu-img check' however doesn't see anything wrong because it
> +# doesn't try to decompress the data and the refcounts are consistent.
> +# TODO: update qemu-img so this can be detected
> +_check_test_img
> +
> +# Increase size of compressed data to the maximum (8192 sectors).
> +# This makes QEMU read more data (8192 sectors instead of 5, host
> +# addresses [0xa0, 0xdf]), but the decompression algorithm
> +# stops once we have enough to restore the uncompressed cluster, so
> +# the rest of the data is ignored.
> +poke_file "$TEST_IMG" $((0x80)) "\x7f\xfe"
> 

[Qemu-devel] [PATCH 1/4] scripts/qemugdb: get pthread_self from "info threads" command

2018-03-28 Thread Vladimir Sementsov-Ogievskiy
When debugging a coredump, pthread_self can't be obtained from
function arch_prctl. Moreover if qemu crashed in coroutine, we
can't find 'start_thread' in current stack-trace. So, add a method,
actually proposed in 1138f24645e9e, which should work for gdb
version >= 7.3.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 scripts/qemugdb/coroutine.py | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py
index ab699794ab..ffaa45c464 100644
--- a/scripts/qemugdb/coroutine.py
+++ b/scripts/qemugdb/coroutine.py
@@ -14,6 +14,7 @@
 # GNU GPL, version 2 or (at your option) any later version.
 
 import gdb
+import re
 
 VOID_PTR = gdb.lookup_type('void').pointer()
 
@@ -28,7 +29,17 @@ def get_fs_base():
 return fs_base
 
 def pthread_self():
-'''Fetch pthread_self() from the glibc start_thread function.'''
+# Try read pthread_self from gdb command 'info threads'.
+# Will fail for old gdb.
+try:
+threads = gdb.execute('info threads', False, True)
+m = re.search('^\* 1Thread (0x[0-9a-f]+)', threads, re.MULTILINE)
+return int(m.group(1), 16)
+except TypeError:
+# gdb doesn't support third parameter for execute
+pass
+
+# Try fetch pthread_self() from the glibc start_thread function.
 f = gdb.newest_frame()
 while f.name() != 'start_thread':
 f = f.older()
-- 
2.11.1




[Qemu-devel] [PATCH 4/4] scripts/qemugdb: backtraces for coroutines in coredump

2018-03-28 Thread Vladimir Sementsov-Ogievskiy
We can't get coroutine backtrace through obvious way
 - set regs
 - bt
 - restore regs
when debugging a coredump.
So, let's go hard way: clone current coredump file, patch regs
in it and execute a subprocess gdb to get backtrace from this
patched coredump.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 scripts/qemugdb/coroutine.py | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py
index 7070a592f3..2a05851e24 100644
--- a/scripts/qemugdb/coroutine.py
+++ b/scripts/qemugdb/coroutine.py
@@ -15,9 +15,32 @@
 
 import gdb
 import re
+import tempfile
+import subprocess
+import os
+import coredump
 
 VOID_PTR = gdb.lookup_type('void').pointer()
 
+def bt_regs_static(regs):
+files = gdb.execute('info files', False, True).split('\n')
+executable = re.match('^Symbols from "(.*)".$', files[0]).group(1)
+dump = re.search("`(.*)'", files[2]).group(1)
+
+with tempfile.NamedTemporaryFile(dir='/tmp', delete=False) as f:
+temp = f.name
+
+coredump.clone_coredump(dump, temp, regs)
+
+cmd = ['gdb', '-batch', '-ex', "python print 'split'",
+   '-ex', 'bt', executable, temp]
+out = subprocess.check_output(cmd)
+out = out.split('split')[1]
+
+os.remove(temp)
+
+print out
+
 def get_fs_base():
 '''Fetch %fs base value using arch_prctl(ARCH_GET_FS).  This is
pthread_self().'''
@@ -122,8 +145,7 @@ class CoroutineCommand(gdb.Command):
 try:
 bt_regs(regs)
 except gdb.error:
-print "Coroutine backtrace can't be obtained without " \
-  "a process to debug."
+bt_regs_static(regs)
 
 class CoroutineSPFunction(gdb.Function):
 def __init__(self):
-- 
2.11.1




[Qemu-devel] [PATCH 0/4] qemugdb: coroutine backtrace for coredump

2018-03-28 Thread Vladimir Sementsov-Ogievskiy
Hi all. Here are some qemugdb enhancements around qemu coroutine command.
The main feature is a backtrace for coroutine, when debugging with a
coredump file.

The problem is that we can't get coroutine backtrace through obvious way
 - set regs
 - bt
 - restore regs
when debugging a coredump.
So, let's go hard way: clone current coredump file, patch regs
in it and execute a subprocess gdb to get backtrace from this
patched coredump.

Vladimir Sementsov-Ogievskiy (4):
  scripts/qemugdb: get pthread_self from "info threads" command
  scripts/qemugdb: improve "qemu coroutine" command
  scripts/qemugdb: add coredump.py for coredump patching
  scripts/qemugdb: backtraces for coroutines in coredump

 scripts/qemugdb/coredump.py  | 51 ++
 scripts/qemugdb/coroutine.py | 53 +++-
 2 files changed, 99 insertions(+), 5 deletions(-)
 create mode 100644 scripts/qemugdb/coredump.py

-- 
2.11.1




[Qemu-devel] [PATCH 3/4] scripts/qemugdb: add coredump.py for coredump patching

2018-03-28 Thread Vladimir Sementsov-Ogievskiy
The main function is write_regs_to_coredump, which opens coredump
file, searches for 'CORE' sign. The first one should correspond
to PRSTATUS note for first thread. Patch register values in
elf_prstatus structure, going after header with 'CORE' sign.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 scripts/qemugdb/coredump.py | 51 +
 1 file changed, 51 insertions(+)
 create mode 100644 scripts/qemugdb/coredump.py

diff --git a/scripts/qemugdb/coredump.py b/scripts/qemugdb/coredump.py
new file mode 100644
index 00..8915461886
--- /dev/null
+++ b/scripts/qemugdb/coredump.py
@@ -0,0 +1,51 @@
+# Coredump patching
+#
+# Copyright (c) 2018 Virtuozzo International GmbH. All rights reserved.
+#
+# Authors:
+#  Vladimir Sementsov-Ogievskiy 
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import struct
+import shutil
+
+def write_regs_to_coredump(fname, set_regs):
+# asm/ptrace.h
+pt_regs = ['r15', 'r14', 'r13', 'r12', 'rbp', 'rbx', 'r11', 'r10',
+   'r9', 'r8', 'rax', 'rcx', 'rdx', 'rsi', 'rdi', 'orig_rax',
+   'rip', 'cs', 'eflags', 'rsp', 'ss']
+
+with open(fname, 'r+b') as f:
+print 'patching core file "%s"' % fname
+
+while f.read(4) != 'CORE':
+pass
+
+print 'found "CORE" at 0x%x' % f.tell()
+f.seek(4, 1) # go to elf_prstatus
+f.seek(112, 1) # offsetof(struct elf_prstatus, pr_reg)
+
+print 'assume pt_regs at 0x%x' % f.tell()
+for reg in pt_regs:
+if reg in set_regs:
+print 'write %s at 0x%x' % (reg, f.tell())
+f.write(struct.pack('q', set_regs[reg]))
+else:
+f.seek(8, 1)
+
+def clone_coredump(source, target, set_regs):
+shutil.copyfile(source, target)
+write_regs_to_coredump(target, set_regs)
-- 
2.11.1




[Qemu-devel] [PATCH 2/4] scripts/qemugdb: improve "qemu coroutine" command

2018-03-28 Thread Vladimir Sementsov-Ogievskiy
 - print regs
 - catch exception for coredump debugging

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 scripts/qemugdb/coroutine.py | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py
index ffaa45c464..7070a592f3 100644
--- a/scripts/qemugdb/coroutine.py
+++ b/scripts/qemugdb/coroutine.py
@@ -80,9 +80,8 @@ def get_jmpbuf_regs(jmpbuf):
 'r15': jmpbuf[JB_R15],
 'rip': glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard) }
 
-def bt_jmpbuf(jmpbuf):
-'''Backtrace a jmpbuf'''
-regs = get_jmpbuf_regs(jmpbuf)
+def bt_regs(regs):
+'''Backtrace with specified regs'''
 old = dict()
 
 for i in regs:
@@ -113,7 +112,18 @@ class CoroutineCommand(gdb.Command):
 gdb.write('usage: qemu coroutine \n')
 return
 
-bt_jmpbuf(coroutine_to_jmpbuf(gdb.parse_and_eval(argv[0])))
+jmpbuf = coroutine_to_jmpbuf(gdb.parse_and_eval(argv[0]))
+regs = get_jmpbuf_regs(jmpbuf)
+for k, v in regs.iteritems():
+gdb.write('%s: 0x%x\n' %(k,v))
+
+gdb.write('\n')
+
+try:
+bt_regs(regs)
+except gdb.error:
+print "Coroutine backtrace can't be obtained without " \
+  "a process to debug."
 
 class CoroutineSPFunction(gdb.Function):
 def __init__(self):
-- 
2.11.1




Re: [Qemu-devel] [PATCH 2/2] vhost-user: back SET/GET_CONFIG requests with a protocol feature

2018-03-28 Thread Michael S. Tsirkin
On Wed, Mar 28, 2018 at 07:08:32PM +0200, Maxime Coquelin wrote:
> 
> 
> On 03/28/2018 06:55 PM, Michael S. Tsirkin wrote:
> > On Wed, Mar 28, 2018 at 05:56:57PM +0200, Maxime Coquelin wrote:
> > > Without a dedicated protocol feature, QEMU cannot know whether
> > > the backend can handle VHOST_USER_SET_CONFIG and
> > > VHOST_USER_GET_CONFIG messages.
> > > 
> > > This patch adds a protocol feature that is only advertised by
> > > QEMU if the device implements the config ops. The backend
> > > should only send VHOST_USER_SLAVE_CONFIG_CHANGE_MSG requests
> > > if the protocol feature has been negotiated.
> > > 
> > > Signed-off-by: Maxime Coquelin 
> > 
> > I presume vhost user blk should fail init if the
> > protocol feature isn't negotiated then.
> 
> I did that and finally removed it.
> In the future, if for example we add config support for net device,
> we will want init to succeed even with old backend version that
> does not support it, right?
> 
> For the vhost user blk case, its init will fail right after,
> because it tries to get config, but will get an error instead.
> 
> As we only have vhost-user-blk supporting it for now, and since it
> is a mandatory feature, I fine to post a v2 that makes
> vhost_user_init() to fail.

Seems safer. We can remove restrictions but not add new ones.

> > 
> > > ---
> > >   docs/interop/vhost-user.txt | 21 -
> > >   hw/virtio/vhost-user.c  | 17 +
> > >   2 files changed, 29 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
> > > index c058c407df..534caab18a 100644
> > > --- a/docs/interop/vhost-user.txt
> > > +++ b/docs/interop/vhost-user.txt
> > > @@ -379,6 +379,7 @@ Protocol features
> > >   #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN   6
> > >   #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
> > >   #define VHOST_USER_PROTOCOL_F_PAGEFAULT  8
> > > +#define VHOST_USER_PROTOCOL_F_CONFIG 9
> > >   Master message types
> > >   
> > > @@ -664,7 +665,8 @@ Master message types
> > > Master payload: virtio device config space
> > > Slave payload: virtio device config space
> > > -  Submitted by the vhost-user master to fetch the contents of the 
> > > virtio
> > > +  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
> > > +  submitted by the vhost-user master to fetch the contents of the 
> > > virtio
> > > device configuration space, vhost-user slave's payload size MUST 
> > > match
> > > master's request, vhost-user slave uses zero length of payload to
> > > indicate an error to vhost-user master. The vhost-user master may
> > > @@ -677,7 +679,8 @@ Master message types
> > > Master payload: virtio device config space
> > > Slave payload: N/A
> > > -  Submitted by the vhost-user master when the Guest changes the 
> > > virtio
> > > +  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
> > > +  submitted by the vhost-user master when the Guest changes the 
> > > virtio
> > > device configuration space and also can be used for live migration
> > > on the destination host. The vhost-user slave must check the flags
> > > field, and slaves MUST NOT accept SET_CONFIG for read-only
> > > @@ -766,13 +769,13 @@ Slave message types
> > >Slave payload: N/A
> > >Master payload: N/A
> > > - Vhost-user slave sends such messages to notify that the virtio 
> > > device's
> > > - configuration space has changed, for those host devices which can 
> > > support
> > > - such feature, host driver can send VHOST_USER_GET_CONFIG message to 
> > > slave
> > > - to get the latest content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is
> > > - negotiated, and slave set the VHOST_USER_NEED_REPLY flag, master 
> > > must
> > > - respond with zero when operation is successfully completed, or 
> > > non-zero
> > > - otherwise.
> > > + When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, vhost-user slave 
> > > sends
> > > + such messages to notify that the virtio device's configuration 
> > > space has
> > > + changed, for those host devices which can support such feature, host
> > > + driver can send VHOST_USER_GET_CONFIG message to slave to get the 
> > > latest
> > > + content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, and 
> > > slave set
> > > + the VHOST_USER_NEED_REPLY flag, master must respond with zero when
> > > + operation is successfully completed, or non-zero otherwise.
> > >   VHOST_USER_PROTOCOL_F_REPLY_ACK:
> > >   ---
> > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > > index 44aea5c0a8..a045203b26 100644
> > > --- a/hw/virtio/vhost-user.c
> > > +++ b/hw/virtio/vhost-user.c
> > > @@ -46,6 +46,7 @@ enum VhostUserProtocolFeature {
> > >   VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 

Re: [Qemu-devel] [PATCH v2 1/1] blockjob: leak fix, remove from txn when failing early

2018-03-28 Thread John Snow


On 03/28/2018 10:28 AM, Marc-André Lureau wrote:
> On Wed, Mar 28, 2018 at 4:09 PM, Jeff Cody  wrote:
>> From: Marc-André Lureau 
>>
>> This fixes leaks found by ASAN such as:
>>   GTESTER tests/test-blockjob
>> =
>> ==31442==ERROR: LeakSanitizer: detected memory leaks
>>
>> Direct leak of 24 byte(s) in 1 object(s) allocated from:
>> #0 0x7f88483cba38 in __interceptor_calloc (/lib64/libasan.so.4+0xdea38)
>> #1 0x7f8845e1bd77 in g_malloc0 ../glib/gmem.c:129
>> #2 0x7f8845e1c04b in g_malloc0_n ../glib/gmem.c:360
>> #3 0x5584d2732498 in block_job_txn_new 
>> /home/elmarco/src/qemu/blockjob.c:172
>> #4 0x5584d2739b28 in block_job_create 
>> /home/elmarco/src/qemu/blockjob.c:973
>> #5 0x5584d270ae31 in mk_job 
>> /home/elmarco/src/qemu/tests/test-blockjob.c:34
>> #6 0x5584d270b1c1 in do_test_id 
>> /home/elmarco/src/qemu/tests/test-blockjob.c:57
>> #7 0x5584d270b65c in test_job_ids 
>> /home/elmarco/src/qemu/tests/test-blockjob.c:118
>> #8 0x7f8845e40b69 in test_case_run ../glib/gtestutils.c:2255
>> #9 0x7f8845e40f29 in g_test_run_suite_internal ../glib/gtestutils.c:2339
>> #10 0x7f8845e40fd2 in g_test_run_suite_internal ../glib/gtestutils.c:2351
>> #11 0x7f8845e411e9 in g_test_run_suite ../glib/gtestutils.c:2426
>> #12 0x7f8845e3fe72 in g_test_run ../glib/gtestutils.c:1692
>> #13 0x5584d270d6e2 in main 
>> /home/elmarco/src/qemu/tests/test-blockjob.c:377
>> #14 0x7f8843641f29 in __libc_start_main (/lib64/libc.so.6+0x20f29)
>>
>> Add an assert to make sure that the job doesn't have associated txn before 
>> free().
>>
>> [Jeff Cody: N.B., used updated patch provided by John Snow]
> 
> Looks good to me, so :)
> Signed-off-by: Marc-André Lureau 
> 
> thanks

Thanks for catching it!



Re: [Qemu-devel] [PATCH 2/2] vhost-user: back SET/GET_CONFIG requests with a protocol feature

2018-03-28 Thread Maxime Coquelin



On 03/28/2018 06:55 PM, Michael S. Tsirkin wrote:

On Wed, Mar 28, 2018 at 05:56:57PM +0200, Maxime Coquelin wrote:

Without a dedicated protocol feature, QEMU cannot know whether
the backend can handle VHOST_USER_SET_CONFIG and
VHOST_USER_GET_CONFIG messages.

This patch adds a protocol feature that is only advertised by
QEMU if the device implements the config ops. The backend
should only send VHOST_USER_SLAVE_CONFIG_CHANGE_MSG requests
if the protocol feature has been negotiated.

Signed-off-by: Maxime Coquelin 


I presume vhost user blk should fail init if the
protocol feature isn't negotiated then.


I did that and finally removed it.
In the future, if for example we add config support for net device,
we will want init to succeed even with old backend version that
does not support it, right?

For the vhost user blk case, its init will fail right after,
because it tries to get config, but will get an error instead.

As we only have vhost-user-blk supporting it for now, and since it
is a mandatory feature, I fine to post a v2 that makes
vhost_user_init() to fail.




---
  docs/interop/vhost-user.txt | 21 -
  hw/virtio/vhost-user.c  | 17 +
  2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
index c058c407df..534caab18a 100644
--- a/docs/interop/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
@@ -379,6 +379,7 @@ Protocol features
  #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN   6
  #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
  #define VHOST_USER_PROTOCOL_F_PAGEFAULT  8
+#define VHOST_USER_PROTOCOL_F_CONFIG 9
  
  Master message types

  
@@ -664,7 +665,8 @@ Master message types
Master payload: virtio device config space
Slave payload: virtio device config space
  
-  Submitted by the vhost-user master to fetch the contents of the virtio

+  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
+  submitted by the vhost-user master to fetch the contents of the virtio
device configuration space, vhost-user slave's payload size MUST match
master's request, vhost-user slave uses zero length of payload to
indicate an error to vhost-user master. The vhost-user master may
@@ -677,7 +679,8 @@ Master message types
Master payload: virtio device config space
Slave payload: N/A
  
-  Submitted by the vhost-user master when the Guest changes the virtio

+  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
+  submitted by the vhost-user master when the Guest changes the virtio
device configuration space and also can be used for live migration
on the destination host. The vhost-user slave must check the flags
field, and slaves MUST NOT accept SET_CONFIG for read-only
@@ -766,13 +769,13 @@ Slave message types
   Slave payload: N/A
   Master payload: N/A
  
- Vhost-user slave sends such messages to notify that the virtio device's

- configuration space has changed, for those host devices which can support
- such feature, host driver can send VHOST_USER_GET_CONFIG message to slave
- to get the latest content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is
- negotiated, and slave set the VHOST_USER_NEED_REPLY flag, master must
- respond with zero when operation is successfully completed, or non-zero
- otherwise.
+ When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, vhost-user slave sends
+ such messages to notify that the virtio device's configuration space has
+ changed, for those host devices which can support such feature, host
+ driver can send VHOST_USER_GET_CONFIG message to slave to get the latest
+ content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, and slave set
+ the VHOST_USER_NEED_REPLY flag, master must respond with zero when
+ operation is successfully completed, or non-zero otherwise.
  
  VHOST_USER_PROTOCOL_F_REPLY_ACK:

  ---
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 44aea5c0a8..a045203b26 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -46,6 +46,7 @@ enum VhostUserProtocolFeature {
  VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
  VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
  VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
+VHOST_USER_PROTOCOL_F_CONFIG = 9,
  VHOST_USER_PROTOCOL_F_MAX
  };
  
@@ -1211,6 +1212,12 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque)
  
  dev->protocol_features =

  protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
+
+if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
+/* Dont acknowledge CONFIG feature if device doesn't support it */
+dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+}
+
  err = 

[Qemu-devel] [PATCH] migration: Don't activate block devices if using -S

2018-03-28 Thread Dr. David Alan Gilbert (git)
From: "Dr. David Alan Gilbert" 

Activating the block devices causes the locks to be taken on
the backing file.  If we're running with -S and the destination libvirt
hasn't started the destination with 'cont', it's expecting the locks are
still untaken.

Don't activate the block devices if we're not going to autostart the VM;
'cont' already will do that anyway.

bz: https://bugzilla.redhat.com/show_bug.cgi?id=1560854
Signed-off-by: Dr. David Alan Gilbert 
---
 migration/migration.c | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 52a5092add..58bd382730 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -306,13 +306,21 @@ static void process_incoming_migration_bh(void *opaque)
 Error *local_err = NULL;
 MigrationIncomingState *mis = opaque;
 
-/* Make sure all file formats flush their mutable metadata.
- * If we get an error here, just don't restart the VM yet. */
-bdrv_invalidate_cache_all(_err);
-if (local_err) {
-error_report_err(local_err);
-local_err = NULL;
-autostart = false;
+/* Only fire up the block code now if we're going to restart the
+ * VM, else 'cont' will do it.
+ * This causes file locking to happen; so we don't want it to happen
+ * unless we really are starting the VM.
+ */
+if (autostart && (!global_state_received() ||
+global_state_get_runstate() == RUN_STATE_RUNNING)) {
+/* Make sure all file formats flush their mutable metadata.
+ * If we get an error here, just don't restart the VM yet. */
+bdrv_invalidate_cache_all(_err);
+if (local_err) {
+error_report_err(local_err);
+local_err = NULL;
+autostart = false;
+}
 }
 
 /*
-- 
2.14.3




Re: [Qemu-devel] [PATCH 2/2] vhost-user: back SET/GET_CONFIG requests with a protocol feature

2018-03-28 Thread Michael S. Tsirkin
On Wed, Mar 28, 2018 at 05:56:57PM +0200, Maxime Coquelin wrote:
> Without a dedicated protocol feature, QEMU cannot know whether
> the backend can handle VHOST_USER_SET_CONFIG and
> VHOST_USER_GET_CONFIG messages.
> 
> This patch adds a protocol feature that is only advertised by
> QEMU if the device implements the config ops. The backend
> should only send VHOST_USER_SLAVE_CONFIG_CHANGE_MSG requests
> if the protocol feature has been negotiated.
> 
> Signed-off-by: Maxime Coquelin 

I presume vhost user blk should fail init if the
protocol feature isn't negotiated then.

> ---
>  docs/interop/vhost-user.txt | 21 -
>  hw/virtio/vhost-user.c  | 17 +
>  2 files changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
> index c058c407df..534caab18a 100644
> --- a/docs/interop/vhost-user.txt
> +++ b/docs/interop/vhost-user.txt
> @@ -379,6 +379,7 @@ Protocol features
>  #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN   6
>  #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
>  #define VHOST_USER_PROTOCOL_F_PAGEFAULT  8
> +#define VHOST_USER_PROTOCOL_F_CONFIG 9
>  
>  Master message types
>  
> @@ -664,7 +665,8 @@ Master message types
>Master payload: virtio device config space
>Slave payload: virtio device config space
>  
> -  Submitted by the vhost-user master to fetch the contents of the virtio
> +  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
> +  submitted by the vhost-user master to fetch the contents of the virtio
>device configuration space, vhost-user slave's payload size MUST match
>master's request, vhost-user slave uses zero length of payload to
>indicate an error to vhost-user master. The vhost-user master may
> @@ -677,7 +679,8 @@ Master message types
>Master payload: virtio device config space
>Slave payload: N/A
>  
> -  Submitted by the vhost-user master when the Guest changes the virtio
> +  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
> +  submitted by the vhost-user master when the Guest changes the virtio
>device configuration space and also can be used for live migration
>on the destination host. The vhost-user slave must check the flags
>field, and slaves MUST NOT accept SET_CONFIG for read-only
> @@ -766,13 +769,13 @@ Slave message types
>   Slave payload: N/A
>   Master payload: N/A
>  
> - Vhost-user slave sends such messages to notify that the virtio device's
> - configuration space has changed, for those host devices which can 
> support
> - such feature, host driver can send VHOST_USER_GET_CONFIG message to 
> slave
> - to get the latest content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is
> - negotiated, and slave set the VHOST_USER_NEED_REPLY flag, master must
> - respond with zero when operation is successfully completed, or non-zero
> - otherwise.
> + When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, vhost-user slave sends
> + such messages to notify that the virtio device's configuration space has
> + changed, for those host devices which can support such feature, host
> + driver can send VHOST_USER_GET_CONFIG message to slave to get the latest
> + content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, and slave set
> + the VHOST_USER_NEED_REPLY flag, master must respond with zero when
> + operation is successfully completed, or non-zero otherwise.
>  
>  VHOST_USER_PROTOCOL_F_REPLY_ACK:
>  ---
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 44aea5c0a8..a045203b26 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -46,6 +46,7 @@ enum VhostUserProtocolFeature {
>  VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
>  VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
>  VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
> +VHOST_USER_PROTOCOL_F_CONFIG = 9,
>  VHOST_USER_PROTOCOL_F_MAX
>  };
>  
> @@ -1211,6 +1212,12 @@ static int vhost_user_init(struct vhost_dev *dev, void 
> *opaque)
>  
>  dev->protocol_features =
>  protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
> +
> +if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) 
> {
> +/* Dont acknowledge CONFIG feature if device doesn't support it 
> */
> +dev->protocol_features &= ~(1ULL << 
> VHOST_USER_PROTOCOL_F_CONFIG);
> +}
> +
>  err = vhost_user_set_protocol_features(dev, dev->protocol_features);
>  if (err < 0) {
>  return err;
> @@ -1405,6 +1412,11 @@ static int vhost_user_get_config(struct vhost_dev 
> *dev, uint8_t *config,
>  .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
>  };
>  
> +if (!virtio_has_feature(dev->protocol_features,
> +

[Qemu-devel] [PATCH v2 1/6] e1000: Convert v3 fields to subsection

2018-03-28 Thread Dr. David Alan Gilbert (git)
From: "Dr. David Alan Gilbert" 

A bunch of new TSO fields were introduced by d62644b4 and this bumped
the VMState version; however it's easier for those trying to keep
backwards migration compatibility if these fields are added in a
subsection instead.

Move the new fields to a subsection.

Since this was added after 2.11, this change will only affect
compatbility with 2.12-rc0.

Signed-off-by: Dr. David Alan Gilbert 
---
 hw/net/e1000.c | 34 ++
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index c7f1695f57..24e9a4ab40 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -1433,9 +1433,29 @@ static const VMStateDescription 
vmstate_e1000_full_mac_state = {
 }
 };
 
+static const VMStateDescription vmstate_e1000_tx_tso_state = {
+.name = "e1000/tx_tso_state",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
+VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
+VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
+VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
+VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
+VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
+VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
+VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
+VMSTATE_UINT16(tx.tso_props.mss, E1000State),
+VMSTATE_INT8(tx.tso_props.ip, E1000State),
+VMSTATE_INT8(tx.tso_props.tcp, E1000State),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_e1000 = {
 .name = "e1000",
-.version_id = 3,
+.version_id = 2,
 .minimum_version_id = 1,
 .pre_save = e1000_pre_save,
 .post_load = e1000_post_load,
@@ -1508,22 +1528,12 @@ static const VMStateDescription vmstate_e1000 = {
 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
-VMSTATE_UINT8_V(tx.tso_props.ipcss, E1000State, 3),
-VMSTATE_UINT8_V(tx.tso_props.ipcso, E1000State, 3),
-VMSTATE_UINT16_V(tx.tso_props.ipcse, E1000State, 3),
-VMSTATE_UINT8_V(tx.tso_props.tucss, E1000State, 3),
-VMSTATE_UINT8_V(tx.tso_props.tucso, E1000State, 3),
-VMSTATE_UINT16_V(tx.tso_props.tucse, E1000State, 3),
-VMSTATE_UINT32_V(tx.tso_props.paylen, E1000State, 3),
-VMSTATE_UINT8_V(tx.tso_props.hdr_len, E1000State, 3),
-VMSTATE_UINT16_V(tx.tso_props.mss, E1000State, 3),
-VMSTATE_INT8_V(tx.tso_props.ip, E1000State, 3),
-VMSTATE_INT8_V(tx.tso_props.tcp, E1000State, 3),
 VMSTATE_END_OF_LIST()
 },
 .subsections = (const VMStateDescription*[]) {
 _e1000_mit_state,
 _e1000_full_mac_state,
+_e1000_tx_tso_state,
 NULL
 }
 };
-- 
2.14.3




[Qemu-devel] [PATCH] target/xtensa: fix flush_window_regs

2018-03-28 Thread Max Filippov
flush_window_regs uses wrong stack frame to save overflow registers in
call8 and call12 frames, which results in wrong register values in
callers of a function that received a signal.
Reimplement flush_window_regs closely following window overflow
sequence.

Signed-off-by: Max Filippov 
---
 linux-user/signal.c | 55 +++--
 1 file changed, 24 insertions(+), 31 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 2ea3e0321f4d..33d5ced30c98 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -7094,52 +7094,45 @@ static abi_ulong get_sigframe(struct target_sigaction 
*sa,
 
 static int flush_window_regs(CPUXtensaState *env)
 {
-const uint32_t nareg_mask = env->config->nareg - 1;
 uint32_t wb = env->sregs[WINDOW_BASE];
-uint32_t ws = (xtensa_replicate_windowstart(env) >> (wb + 1)) &
-((1 << env->config->nareg / 4) - 1);
-uint32_t d = ctz32(ws) + 1;
-uint32_t sp;
-abi_long ret = 0;
-
-wb += d;
-ws >>= d;
+uint32_t ws = xtensa_replicate_windowstart(env) >> (wb + 1);
+unsigned d = ctz32(ws) + 1;
+unsigned i;
+int ret = 0;
 
-xtensa_sync_phys_from_window(env);
-sp = env->phys_regs[(wb * 4 + 1) & nareg_mask];
+for (i = d; i < env->config->nareg / 4; i += d) {
+uint32_t ssp, osp;
+unsigned j;
 
-while (ws && ret == 0) {
-int d;
-int i;
-int idx;
+ws >>= d;
+xtensa_rotate_window(env, d);
 
 if (ws & 0x1) {
-ws >>= 1;
+ssp = env->regs[5];
 d = 1;
 } else if (ws & 0x2) {
-ws >>= 2;
+ssp = env->regs[9];
+ret |= get_user_ual(osp, env->regs[1] - 12);
+osp -= 32;
 d = 2;
-for (i = 0; i < 4; ++i) {
-idx = (wb * 4 + 4 + i) & nareg_mask;
-ret |= put_user_ual(env->phys_regs[idx], sp + (i - 12) * 4);
-}
 } else if (ws & 0x4) {
-ws >>= 3;
+ssp = env->regs[13];
+ret |= get_user_ual(osp, env->regs[1] - 12);
+osp -= 48;
 d = 3;
-for (i = 0; i < 8; ++i) {
-idx = (wb * 4 + 4 + i) & nareg_mask;
-ret |= put_user_ual(env->phys_regs[idx], sp + (i - 16) * 4);
-}
 } else {
 g_assert_not_reached();
 }
-sp = env->phys_regs[((wb + d) * 4 + 1) & nareg_mask];
-for (i = 0; i < 4; ++i) {
-idx = (wb * 4 + i) & nareg_mask;
-ret |= put_user_ual(env->phys_regs[idx], sp + (i - 4) * 4);
+
+for (j = 0; j < 4; ++j) {
+ret |= put_user_ual(env->regs[j], ssp - 16 + j * 4);
+}
+for (j = 4; j < d * 4; ++j) {
+ret |= put_user_ual(env->regs[j], osp - 16 + j * 4);
 }
-wb += d;
 }
+xtensa_rotate_window(env, d);
+g_assert(env->sregs[WINDOW_BASE] == wb);
 return ret == 0;
 }
 
-- 
2.11.0




[Qemu-devel] [PATCH v2 0/6] e1000 migration changes for 2.12

2018-03-28 Thread Dr. David Alan Gilbert (git)
From: "Dr. David Alan Gilbert" 

Hi,
  This set of patches change the e1000 migration code to make
it easier to keep with compatibility with older versions in backwards
migration.

I think the first 3 patches are fairly uncontrovercial and I would like
them for 2.12; it would be nice to have the lot since changing them
after we've shipped is much more difficult.

v2
  Ed and Paolo answered my question that I asked in the cover letter;
and I think I've followed the advice - although my testing has been
very light.  The new patches do two things:
   a) When we receive a stream without the subsection we duplicate the
received pops state into both props and tso_props.
   b) When we send without the subsection we decide which set to send
in the main part of the state based on which state was last changed.

Dave

Dr. David Alan Gilbert (6):
  e1000: Convert v3 fields to subsection
  e1000: Dupe offload data on reading old stream
  e1000: wire new subsection to property
  e1000: Migrate props via a temporary structure
  e1000: Choose which set of props to migrate
  e1000: Old machine types, turn new subsection off

 hw/net/e1000.c  | 103 
 include/hw/compat.h |   4 ++
 2 files changed, 84 insertions(+), 23 deletions(-)

-- 
2.14.3




[Qemu-devel] [PATCH v2 6/6] e1000: Old machine types, turn new subsection off

2018-03-28 Thread Dr. David Alan Gilbert (git)
From: "Dr. David Alan Gilbert" 

Turn the newly added subsection off for old machine types

Signed-off-by: Dr. David Alan Gilbert 
---
 include/hw/compat.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/hw/compat.h b/include/hw/compat.h
index bc9e3a6627..13242b831a 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -14,6 +14,10 @@
 .driver   = "vhost-user-blk-pci",\
 .property = "vectors",\
 .value= "2",\
+},{\
+.driver   = "e1000",\
+.property = "migrate_tso_props",\
+.value= "off",\
 },
 
 #define HW_COMPAT_2_10 \
-- 
2.14.3




[Qemu-devel] [PATCH v2 2/6] e1000: Dupe offload data on reading old stream

2018-03-28 Thread Dr. David Alan Gilbert (git)
From: "Dr. David Alan Gilbert" 

Old QEMUs only had one set of offload data;  when we only receive
one lot, dupe the received data - that should give us about the
same bug level as the old version.

Signed-off-by: Dr. David Alan Gilbert 
---
 hw/net/e1000.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 24e9a4ab40..d399ce3e4f 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -127,6 +127,7 @@ typedef struct E1000State_st {
 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 uint32_t compat_flags;
+bool received_tx_tso;
 } E1000State;
 
 #define chkflag(x) (s->compat_flags & E1000_FLAG_##x)
@@ -1390,6 +1391,20 @@ static int e1000_post_load(void *opaque, int version_id)
   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
 }
 
+if (!s->received_tx_tso) {
+/* We received only one set of offload data (tx.props)
+ * and haven't got tx.tso_props.  The best we can do
+ * is dupe the data.
+ */
+s->tx.tso_props = s->tx.props;
+}
+return 0;
+}
+
+static int e1000_tx_tso_post_load(void *opaque, int version_id)
+{
+E1000State *s = opaque;
+s->received_tx_tso = true;
 return 0;
 }
 
@@ -1437,6 +1452,7 @@ static const VMStateDescription 
vmstate_e1000_tx_tso_state = {
 .name = "e1000/tx_tso_state",
 .version_id = 1,
 .minimum_version_id = 1,
+.post_load = e1000_tx_tso_post_load,
 .fields = (VMStateField[]) {
 VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
 VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
-- 
2.14.3




[Qemu-devel] [PATCH v2 5/6] e1000: Choose which set of props to migrate

2018-03-28 Thread Dr. David Alan Gilbert (git)
From: "Dr. David Alan Gilbert" 

When we're using the subsection we migrate both
the 'props' and 'tso_props' data; when we're not using
the subsection (to migrate to 2.11 or old machine types) we've
got to choose what to migrate in the main structure.

If we're using the subsection migrate 'props' in the main structure.
If we're not using the subsection then migrate the last one
that changed, which gives behaviour similar to the old behaviour.

Signed-off-by: Dr. David Alan Gilbert 
---
 hw/net/e1000.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 4e606d4b2a..13a9494a8d 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -130,6 +130,7 @@ typedef struct E1000State_st {
 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 uint32_t compat_flags;
 bool received_tx_tso;
+bool use_tso_for_migration;
 e1000x_txd_props mig_props;
 } E1000State;
 
@@ -622,9 +623,11 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
 if (dtype == E1000_TXD_CMD_DEXT) {/* context descriptor */
 if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
 e1000x_read_tx_ctx_descr(xp, >tso_props);
+s->use_tso_for_migration = 1;
 tp->tso_frames = 0;
 } else {
 e1000x_read_tx_ctx_descr(xp, >props);
+s->use_tso_for_migration = 0;
 }
 return;
 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
@@ -1366,7 +1369,20 @@ static int e1000_pre_save(void *opaque)
 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
 }
 
-s->mig_props = s->tx.props;
+/* Decide which set of props to migrate in the main structure */
+if (chkflag(TSO) || !s->use_tso_for_migration) {
+/* Either we're migrating with the extra subsection, in which
+ * case the mig_props is always 'props' OR
+ * we've not got the subsection, but 'props' was the last
+ * updated.
+ */
+s->mig_props = s->tx.props;
+} else {
+/* We're not using the subsection, and 'tso_props' was
+ * the last updated.
+ */
+s->mig_props = s->tx.tso_props;
+}
 return 0;
 }
 
-- 
2.14.3




[Qemu-devel] [PATCH v2 4/6] e1000: Migrate props via a temporary structure

2018-03-28 Thread Dr. David Alan Gilbert (git)
From: "Dr. David Alan Gilbert" 

Swing the tx.props out via a temporary structure, so in future patches
we can select what we're going to send.

Signed-off-by: Dr. David Alan Gilbert 
---
 hw/net/e1000.c | 27 +++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index bb8ee2acb0..4e606d4b2a 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -130,6 +130,7 @@ typedef struct E1000State_st {
 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 uint32_t compat_flags;
 bool received_tx_tso;
+e1000x_txd_props mig_props;
 } E1000State;
 
 #define chkflag(x) (s->compat_flags & E1000_FLAG_##x)
@@ -1365,6 +1366,7 @@ static int e1000_pre_save(void *opaque)
 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
 }
 
+s->mig_props = s->tx.props;
 return 0;
 }
 
@@ -1393,12 +1395,13 @@ static int e1000_post_load(void *opaque, int version_id)
   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
 }
 
+s->tx.props = s->mig_props;
 if (!s->received_tx_tso) {
 /* We received only one set of offload data (tx.props)
  * and haven't got tx.tso_props.  The best we can do
  * is dupe the data.
  */
-s->tx.tso_props = s->tx.props;
+s->tx.tso_props = s->mig_props;
 }
 return 0;
 }
@@ -1496,20 +1499,20 @@ static const VMStateDescription vmstate_e1000 = {
 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
 VMSTATE_UINT16(eecd_state.reading, E1000State),
 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
-VMSTATE_UINT8(tx.props.ipcss, E1000State),
-VMSTATE_UINT8(tx.props.ipcso, E1000State),
-VMSTATE_UINT16(tx.props.ipcse, E1000State),
-VMSTATE_UINT8(tx.props.tucss, E1000State),
-VMSTATE_UINT8(tx.props.tucso, E1000State),
-VMSTATE_UINT16(tx.props.tucse, E1000State),
-VMSTATE_UINT32(tx.props.paylen, E1000State),
-VMSTATE_UINT8(tx.props.hdr_len, E1000State),
-VMSTATE_UINT16(tx.props.mss, E1000State),
+VMSTATE_UINT8(mig_props.ipcss, E1000State),
+VMSTATE_UINT8(mig_props.ipcso, E1000State),
+VMSTATE_UINT16(mig_props.ipcse, E1000State),
+VMSTATE_UINT8(mig_props.tucss, E1000State),
+VMSTATE_UINT8(mig_props.tucso, E1000State),
+VMSTATE_UINT16(mig_props.tucse, E1000State),
+VMSTATE_UINT32(mig_props.paylen, E1000State),
+VMSTATE_UINT8(mig_props.hdr_len, E1000State),
+VMSTATE_UINT16(mig_props.mss, E1000State),
 VMSTATE_UINT16(tx.size, E1000State),
 VMSTATE_UINT16(tx.tso_frames, E1000State),
 VMSTATE_UINT8(tx.sum_needed, E1000State),
-VMSTATE_INT8(tx.props.ip, E1000State),
-VMSTATE_INT8(tx.props.tcp, E1000State),
+VMSTATE_INT8(mig_props.ip, E1000State),
+VMSTATE_INT8(mig_props.tcp, E1000State),
 VMSTATE_BUFFER(tx.header, E1000State),
 VMSTATE_BUFFER(tx.data, E1000State),
 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
-- 
2.14.3




[Qemu-devel] [PATCH v2 3/6] e1000: wire new subsection to property

2018-03-28 Thread Dr. David Alan Gilbert (git)
From: "Dr. David Alan Gilbert" 

Wire the new subsection from the previous commit to a property
so we can turn it off easily.

Signed-off-by: Dr. David Alan Gilbert 
---
 hw/net/e1000.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index d399ce3e4f..bb8ee2acb0 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -123,9 +123,11 @@ typedef struct E1000State_st {
 #define E1000_FLAG_AUTONEG_BIT 0
 #define E1000_FLAG_MIT_BIT 1
 #define E1000_FLAG_MAC_BIT 2
+#define E1000_FLAG_TSO_BIT 3
 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
+#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
 uint32_t compat_flags;
 bool received_tx_tso;
 } E1000State;
@@ -1422,6 +1424,13 @@ static bool e1000_full_mac_needed(void *opaque)
 return chkflag(MAC);
 }
 
+static bool e1000_tso_state_needed(void *opaque)
+{
+E1000State *s = opaque;
+
+return chkflag(TSO);
+}
+
 static const VMStateDescription vmstate_e1000_mit_state = {
 .name = "e1000/mit_state",
 .version_id = 1,
@@ -1452,6 +1461,7 @@ static const VMStateDescription 
vmstate_e1000_tx_tso_state = {
 .name = "e1000/tx_tso_state",
 .version_id = 1,
 .minimum_version_id = 1,
+.needed = e1000_tso_state_needed,
 .post_load = e1000_tx_tso_post_load,
 .fields = (VMStateField[]) {
 VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
@@ -1677,6 +1687,8 @@ static Property e1000_properties[] = {
 compat_flags, E1000_FLAG_MIT_BIT, true),
 DEFINE_PROP_BIT("extra_mac_registers", E1000State,
 compat_flags, E1000_FLAG_MAC_BIT, true),
+DEFINE_PROP_BIT("migrate_tso_props", E1000State,
+compat_flags, E1000_FLAG_TSO_BIT, true),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.14.3




Re: [Qemu-devel] [PATCH 02/16] qht: return existing entry when qht_insert fails

2018-03-28 Thread Alex Bennée

Emilio G. Cota  writes:

> The meaning of "existing" is now changed to "matches in hash and
> ht->cmp result". This is saner than just checking the pointer value.
>
> Note that we now return NULL on insertion success, or the existing
> pointer on failure. We can do this because NULL pointers are not
> allowed to be inserted in QHT.
>
> Suggested-by: Richard Henderson 
> Signed-off-by: Emilio G. Cota 
> ---
>  include/qemu/qht.h |  7 ---
>  tests/qht-bench.c  |  4 ++--
>  tests/test-qht.c   |  5 -
>  util/qht.c | 17 +
>  4 files changed, 19 insertions(+), 14 deletions(-)
>
> diff --git a/include/qemu/qht.h b/include/qemu/qht.h
> index dd512bf..c320cb6 100644
> --- a/include/qemu/qht.h
> +++ b/include/qemu/qht.h
> @@ -77,10 +77,11 @@ void qht_destroy(struct qht *ht);
>   * In case of successful operation, smp_wmb() is implied before the pointer 
> is
>   * inserted into the hash table.
>   *
> - * Returns true on success.
> - * Returns false if the @p-@hash pair already exists in the hash table.
> + * On success, returns NULL.
> + * On failure, returns the pointer from an entry that is equivalent (i.e.
> + * ht->cmp matches and the hash is the same) to @p-@h.
>   */
> -bool qht_insert(struct qht *ht, void *p, uint32_t hash);
> +void *qht_insert(struct qht *ht, void *p, uint32_t hash);

Hmm this seems needlessly counter intuitive. I realise the potential
efficiency in overloading success/fail but wouldn't a:

  bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing);

be conceptually nicer?

>
>  /**
>   * qht_lookup_custom - Look up a pointer using a custom comparison function.
> diff --git a/tests/qht-bench.c b/tests/qht-bench.c
> index c94ac25..2f88400 100644
> --- a/tests/qht-bench.c
> +++ b/tests/qht-bench.c
> @@ -163,7 +163,7 @@ static void do_rw(struct thread_info *info)
>  bool written = false;
>
>  if (qht_lookup(, p, hash) == NULL) {
> -written = qht_insert(, p, hash);
> +written = !qht_insert(, p, hash);
>  }
>  if (written) {
>  stats->in++;
> @@ -322,7 +322,7 @@ static void htable_init(void)
>  r = xorshift64star(r);
>  p = [r & (init_range - 1)];
>  hash = h(*p);
> -if (qht_insert(, p, hash)) {
> +if (qht_insert(, p, hash) == NULL) {
>  break;
>  }
>  retries++;
> diff --git a/tests/test-qht.c b/tests/test-qht.c
> index f8f2886..7164ae4 100644
> --- a/tests/test-qht.c
> +++ b/tests/test-qht.c
> @@ -27,11 +27,14 @@ static void insert(int a, int b)
>
>  for (i = a; i < b; i++) {
>  uint32_t hash;
> +void *existing;
>
>  arr[i] = i;
>  hash = i;
>
> -qht_insert(, [i], hash);
> +g_assert_true(!qht_insert(, [i], hash));
> +existing = qht_insert(, [i], hash);
> +g_assert_true(existing == [i]);
>  }
>  }
>
> diff --git a/util/qht.c b/util/qht.c
> index dcb3ee1..f9f49a9 100644
> --- a/util/qht.c
> +++ b/util/qht.c
> @@ -511,9 +511,9 @@ void *qht_lookup(struct qht *ht, const void *userp, 
> uint32_t hash)
>  }
>
>  /* call with head->lock held */
> -static bool qht_insert__locked(struct qht *ht, struct qht_map *map,
> -   struct qht_bucket *head, void *p, uint32_t 
> hash,
> -   bool *needs_resize)
> +static void *qht_insert__locked(struct qht *ht, struct qht_map *map,
> +struct qht_bucket *head, void *p, uint32_t 
> hash,
> +bool *needs_resize)
>  {
>  struct qht_bucket *b = head;
>  struct qht_bucket *prev = NULL;
> @@ -523,8 +523,9 @@ static bool qht_insert__locked(struct qht *ht, struct 
> qht_map *map,
>  do {
>  for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
>  if (b->pointers[i]) {
> -if (unlikely(b->pointers[i] == p)) {
> -return false;
> +if (unlikely(b->hashes[i] == hash &&
> + ht->cmp(b->pointers[i], p))) {
> +return b->pointers[i];
>  }
>  } else {
>  goto found;
> @@ -553,7 +554,7 @@ static bool qht_insert__locked(struct qht *ht, struct 
> qht_map *map,
>  atomic_set(>hashes[i], hash);
>  atomic_set(>pointers[i], p);
>  seqlock_write_end(>sequence);
> -return true;
> +return NULL;
>  }
>
>  static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
> @@ -577,12 +578,12 @@ static __attribute__((noinline)) void 
> qht_grow_maybe(struct qht *ht)
>  qemu_mutex_unlock(>lock);
>  }
>
> -bool qht_insert(struct qht *ht, void *p, uint32_t hash)
> +void *qht_insert(struct qht *ht, void *p, uint32_t hash)
>  {
>  struct qht_bucket *b;
>  struct qht_map *map;
>  bool needs_resize = false;
> -bool ret;
> +  

Re: [Qemu-devel] [PATCH 01/16] qht: require a default comparison function

2018-03-28 Thread Alex Bennée

Emilio G. Cota  writes:

> qht_lookup now uses the default cmp function. qht_lookup_custom is defined
> to retain the old behaviour, that is a cmp function is explicitly provided.
>
> qht_insert will gain use of the default cmp in the next patch.
>
> Signed-off-by: Emilio G. Cota 

Reviewed-by: Alex Bennée 

> ---
>  accel/tcg/cpu-exec.c  |  4 ++--
>  accel/tcg/translate-all.c | 16 +++-
>  include/qemu/qht.h| 23 +++
>  tests/qht-bench.c | 14 +++---
>  tests/test-qht.c  | 15 ++-
>  util/qht.c| 14 +++---
>  6 files changed, 64 insertions(+), 22 deletions(-)
>
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index 280200f..ec57564 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -293,7 +293,7 @@ struct tb_desc {
>  uint32_t trace_vcpu_dstate;
>  };
>
> -static bool tb_cmp(const void *p, const void *d)
> +static bool tb_lookup_cmp(const void *p, const void *d)
>  {
>  const TranslationBlock *tb = p;
>  const struct tb_desc *desc = d;
> @@ -338,7 +338,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, 
> target_ulong pc,
>  phys_pc = get_page_addr_code(desc.env, pc);
>  desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
>  h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate);
> -return qht_lookup(_ctx.htable, tb_cmp, , h);
> +return qht_lookup_custom(_ctx.htable, tb_lookup_cmp, , h);
>  }
>
>  void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 67795cd..1cf10f8 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -785,11 +785,25 @@ static inline void code_gen_alloc(size_t tb_size)
>  qemu_mutex_init(_ctx.tb_lock);
>  }
>
> +static bool tb_cmp(const void *ap, const void *bp)
> +{
> +const TranslationBlock *a = ap;
> +const TranslationBlock *b = bp;
> +
> +return a->pc == b->pc &&
> +a->cs_base == b->cs_base &&
> +a->flags == b->flags &&
> +(tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) &&
> +a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
> +a->page_addr[0] == b->page_addr[0] &&
> +a->page_addr[1] == b->page_addr[1];
> +}
> +
>  static void tb_htable_init(void)
>  {
>  unsigned int mode = QHT_MODE_AUTO_RESIZE;
>
> -qht_init(_ctx.htable, CODE_GEN_HTABLE_SIZE, mode);
> +qht_init(_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
>  }
>
>  /* Must be called before using the QEMU cpus. 'tb_size' is the size
> diff --git a/include/qemu/qht.h b/include/qemu/qht.h
> index 531aa95..dd512bf 100644
> --- a/include/qemu/qht.h
> +++ b/include/qemu/qht.h
> @@ -11,8 +11,11 @@
>  #include "qemu/thread.h"
>  #include "qemu/qdist.h"
>
> +typedef bool (*qht_cmp_func_t)(const void *a, const void *b);
> +
>  struct qht {
>  struct qht_map *map;
> +qht_cmp_func_t cmp;
>  QemuMutex lock; /* serializes setters of ht->map */
>  unsigned int mode;
>  };
> @@ -47,10 +50,12 @@ typedef void (*qht_iter_func_t)(struct qht *ht, void *p, 
> uint32_t h, void *up);
>  /**
>   * qht_init - Initialize a QHT
>   * @ht: QHT to be initialized
> + * @cmp: default comparison function. Cannot be NULL.
>   * @n_elems: number of entries the hash table should be optimized for.
>   * @mode: bitmask with OR'ed QHT_MODE_*
>   */
> -void qht_init(struct qht *ht, size_t n_elems, unsigned int mode);
> +void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems,
> +  unsigned int mode);
>
>  /**
>   * qht_destroy - destroy a previously initialized QHT
> @@ -78,7 +83,7 @@ void qht_destroy(struct qht *ht);
>  bool qht_insert(struct qht *ht, void *p, uint32_t hash);
>
>  /**
> - * qht_lookup - Look up a pointer in a QHT
> + * qht_lookup_custom - Look up a pointer using a custom comparison function.
>   * @ht: QHT to be looked up
>   * @func: function to compare existing pointers against @userp
>   * @userp: pointer to pass to @func
> @@ -94,8 +99,18 @@ bool qht_insert(struct qht *ht, void *p, uint32_t hash);
>   * Returns the corresponding pointer when a match is found.
>   * Returns NULL otherwise.
>   */
> -void *qht_lookup(struct qht *ht, qht_lookup_func_t func, const void *userp,
> - uint32_t hash);
> +void *qht_lookup_custom(struct qht *ht, qht_lookup_func_t func,
> +const void *userp, uint32_t hash);
> +
> +/**
> + * qht_lookup - Look up a pointer in a QHT
> + * @ht: QHT to be looked up
> + * @userp: pointer to pass to @func
> + * @hash: hash of the pointer to be looked up
> + *
> + * Calls qht_lookup_custom() using @ht's default comparison function.
> + */
> +void *qht_lookup(struct qht *ht, const void *userp, uint32_t hash);
>
>  /**
>   * qht_remove - remove a pointer from the hash table
> diff --git a/tests/qht-bench.c b/tests/qht-bench.c
> 

Re: [Qemu-devel] [PATCH 13/16] translate-all: protect TB jumps with a per-destination-TB lock

2018-03-28 Thread Alex Bennée

Emilio G. Cota  writes:

> This applies to both user-mode and !user-mode emulation.
>

> @@ -2124,7 +2148,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
>  /* Adjust the execution state of the next TB.  */
>  cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n;
>
> -if (tb->cflags & CF_NOCACHE) {
> +if (tb_cflags(tb) & CF_NOCACHE) {
>  if (tb->orig_tb) {
>  /* Invalidate original TB if this TB was generated in
>   * cpu_exec_nocache() */

Heads up, this fails to apply on master, most likely due to
87f963be66a32453e001d1052b000f1653605caa

--
Alex Bennée



Re: [Qemu-devel] [PATCH 0/2] vhost-user: Back SET/GET_CONFIG with a protocol feature

2018-03-28 Thread Maxime Coquelin



On 03/28/2018 05:56 PM, Maxime Coquelin wrote:

Hi,

While reviewing DPDK series adding support to VHOST_USER_SET_CONFIG
and VHOST_USER_GET_CONFIG request, I found that it was not backed
with a dedicated protocol feature.

This series addresses this by adding a new protocol feature bit,
and by only negotiating it if the device supports it, as suggested
by Michael. Indeed, if the feature is supported by other type of
devices in the future, it would confuse the backends as it couldn't
know whether the device really support it or not.

To know whether the vhost device support config feature, the trick
is to check whether it implemented the config_ops. That's the
reason why the first patch moves setting the config ops in
vhost-user-blk befoire calling vhost_user_init().

The series targets v2.12 release, else we may have to disable these
requests in this release.

*NOTE*: The series has only been tested as I don't have the


s/tested/build tested/


environment to try it. Changpeng, can you please test it?

Thanks,
Maxime

Maxime Coquelin (2):
   vhost-user-blk: set config ops before vhost-user init
   vhost-user: back SET/GET_CONFIG requests with a protocol feature

  docs/interop/vhost-user.txt | 21 -
  hw/block/vhost-user-blk.c   |  4 ++--
  hw/virtio/vhost-user.c  | 17 +
  3 files changed, 31 insertions(+), 11 deletions(-)





[Qemu-devel] [PATCH 1/2] vhost-user-blk: set config ops before vhost-user init

2018-03-28 Thread Maxime Coquelin
As soon as vhost-user init is done, the backend may send
VHOST_USER_SLAVE_CONFIG_CHANGE_MSG, so let's set the
notification callback before it.

Also, it will be used to know whether the device supports
the config feature to advertize it or not.

Signed-off-by: Maxime Coquelin 
---
 hw/block/vhost-user-blk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index f840f07dfe..262baca432 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -259,6 +259,8 @@ static void vhost_user_blk_device_realize(DeviceState *dev, 
Error **errp)
 s->dev.vq_index = 0;
 s->dev.backend_features = 0;
 
+vhost_dev_set_config_notifier(>dev, _ops);
+
 ret = vhost_dev_init(>dev, >chardev, VHOST_BACKEND_TYPE_USER, 0);
 if (ret < 0) {
 error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
@@ -277,8 +279,6 @@ static void vhost_user_blk_device_realize(DeviceState *dev, 
Error **errp)
 s->blkcfg.num_queues = s->num_queues;
 }
 
-vhost_dev_set_config_notifier(>dev, _ops);
-
 return;
 
 vhost_err:
-- 
2.14.3




[Qemu-devel] [PATCH 2/2] vhost-user: back SET/GET_CONFIG requests with a protocol feature

2018-03-28 Thread Maxime Coquelin
Without a dedicated protocol feature, QEMU cannot know whether
the backend can handle VHOST_USER_SET_CONFIG and
VHOST_USER_GET_CONFIG messages.

This patch adds a protocol feature that is only advertised by
QEMU if the device implements the config ops. The backend
should only send VHOST_USER_SLAVE_CONFIG_CHANGE_MSG requests
if the protocol feature has been negotiated.

Signed-off-by: Maxime Coquelin 
---
 docs/interop/vhost-user.txt | 21 -
 hw/virtio/vhost-user.c  | 17 +
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
index c058c407df..534caab18a 100644
--- a/docs/interop/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
@@ -379,6 +379,7 @@ Protocol features
 #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN   6
 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
 #define VHOST_USER_PROTOCOL_F_PAGEFAULT  8
+#define VHOST_USER_PROTOCOL_F_CONFIG 9
 
 Master message types
 
@@ -664,7 +665,8 @@ Master message types
   Master payload: virtio device config space
   Slave payload: virtio device config space
 
-  Submitted by the vhost-user master to fetch the contents of the virtio
+  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
+  submitted by the vhost-user master to fetch the contents of the virtio
   device configuration space, vhost-user slave's payload size MUST match
   master's request, vhost-user slave uses zero length of payload to
   indicate an error to vhost-user master. The vhost-user master may
@@ -677,7 +679,8 @@ Master message types
   Master payload: virtio device config space
   Slave payload: N/A
 
-  Submitted by the vhost-user master when the Guest changes the virtio
+  When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, this message is
+  submitted by the vhost-user master when the Guest changes the virtio
   device configuration space and also can be used for live migration
   on the destination host. The vhost-user slave must check the flags
   field, and slaves MUST NOT accept SET_CONFIG for read-only
@@ -766,13 +769,13 @@ Slave message types
  Slave payload: N/A
  Master payload: N/A
 
- Vhost-user slave sends such messages to notify that the virtio device's
- configuration space has changed, for those host devices which can support
- such feature, host driver can send VHOST_USER_GET_CONFIG message to slave
- to get the latest content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is
- negotiated, and slave set the VHOST_USER_NEED_REPLY flag, master must
- respond with zero when operation is successfully completed, or non-zero
- otherwise.
+ When VHOST_USER_PROTOCOL_F_CONFIG is negotiated, vhost-user slave sends
+ such messages to notify that the virtio device's configuration space has
+ changed, for those host devices which can support such feature, host
+ driver can send VHOST_USER_GET_CONFIG message to slave to get the latest
+ content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, and slave set
+ the VHOST_USER_NEED_REPLY flag, master must respond with zero when
+ operation is successfully completed, or non-zero otherwise.
 
 VHOST_USER_PROTOCOL_F_REPLY_ACK:
 ---
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 44aea5c0a8..a045203b26 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -46,6 +46,7 @@ enum VhostUserProtocolFeature {
 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
+VHOST_USER_PROTOCOL_F_CONFIG = 9,
 VHOST_USER_PROTOCOL_F_MAX
 };
 
@@ -1211,6 +1212,12 @@ static int vhost_user_init(struct vhost_dev *dev, void 
*opaque)
 
 dev->protocol_features =
 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
+
+if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
+/* Dont acknowledge CONFIG feature if device doesn't support it */
+dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+}
+
 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
 if (err < 0) {
 return err;
@@ -1405,6 +1412,11 @@ static int vhost_user_get_config(struct vhost_dev *dev, 
uint8_t *config,
 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
 };
 
+if (!virtio_has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_CONFIG)) {
+return -1;
+}
+
 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
 return -1;
 }
@@ -1448,6 +1460,11 @@ static int vhost_user_set_config(struct vhost_dev *dev, 
const uint8_t *data,
 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
 };
 
+if (!virtio_has_feature(dev->protocol_features,
+

[Qemu-devel] [PATCH 0/2] vhost-user: Back SET/GET_CONFIG with a protocol feature

2018-03-28 Thread Maxime Coquelin
Hi,

While reviewing DPDK series adding support to VHOST_USER_SET_CONFIG
and VHOST_USER_GET_CONFIG request, I found that it was not backed
with a dedicated protocol feature.

This series addresses this by adding a new protocol feature bit,
and by only negotiating it if the device supports it, as suggested
by Michael. Indeed, if the feature is supported by other type of
devices in the future, it would confuse the backends as it couldn't
know whether the device really support it or not.

To know whether the vhost device support config feature, the trick
is to check whether it implemented the config_ops. That's the 
reason why the first patch moves setting the config ops in
vhost-user-blk befoire calling vhost_user_init().

The series targets v2.12 release, else we may have to disable these
requests in this release.

*NOTE*: The series has only been tested as I don't have the
environment to try it. Changpeng, can you please test it?

Thanks,
Maxime

Maxime Coquelin (2):
  vhost-user-blk: set config ops before vhost-user init
  vhost-user: back SET/GET_CONFIG requests with a protocol feature

 docs/interop/vhost-user.txt | 21 -
 hw/block/vhost-user-blk.c   |  4 ++--
 hw/virtio/vhost-user.c  | 17 +
 3 files changed, 31 insertions(+), 11 deletions(-)

-- 
2.14.3




Re: [Qemu-devel] [PATCH v5.2 for 2.13 4/4] tpm: Add test cases that uses the external swtpm with CRB interface

2018-03-28 Thread Marc-André Lureau
Hi

On Sat, Mar 17, 2018 at 3:24 AM, Stefan Berger
 wrote:
> Add a test program for testing the CRB with the external swtpm.
>
> The 1st test case extends a PCR and reads back the value and compares
> it against an expected return packet.
>
> The 2nd test case repeats the 1st test case and then migrates the
> external swtpm's state along with the VM state to a destination
> QEMU and swtpm and checks that the PCR has the expected value now.
>
> Signed-off-by: Stefan Berger 

Nice, with the below diff applied:
Reviewed-by: Marc-André Lureau 



diff --git a/tests/tpm-crb-swtpm-test.c b/tests/tpm-crb-swtpm-test.c
index b2f6068b50..505a927f4c 100644
--- a/tests/tpm-crb-swtpm-test.c
+++ b/tests/tpm-crb-swtpm-test.c
@@ -114,6 +114,7 @@ static void migration_start_qemu(QTestState
**src_qemu, QTestState **dst_qemu,
 free(src_qemu_args);
 free(dst_qemu_args);
 }
+
 static void tpm_crb_swtpm_test(const void *data)
 {
 char *args = NULL;
@@ -137,6 +138,7 @@ static void tpm_crb_swtpm_test(const void *data)
 addr->u.q_unix.path);

 s = qtest_start(args);
+g_free(args);

 tpm_util_startup(s, tpm_util_crb_transfer);
 tpm_util_pcrextend(s, tpm_util_crb_transfer);
@@ -239,6 +241,7 @@ int main(int argc, char **argv)
 g_free(ts.dst_tpm_path);
 g_rmdir(ts.src_tpm_path);
 g_free(ts.src_tpm_path);
+g_free(ts.uri);

 return ret;
 }

> ---
>  tests/Makefile.include |   3 +
>  tests/tpm-crb-swtpm-test.c | 244 
> +
>  tests/tpm-util.c   | 143 ++
>  tests/tpm-util.h   |  36 +++
>  4 files changed, 426 insertions(+)
>  create mode 100644 tests/tpm-crb-swtpm-test.c
>  create mode 100644 tests/tpm-util.c
>  create mode 100644 tests/tpm-util.h
>
> diff --git a/tests/Makefile.include b/tests/Makefile.include
> index 42fd426..bd4f56f 100644
> --- a/tests/Makefile.include
> +++ b/tests/Makefile.include
> @@ -297,6 +297,7 @@ check-qtest-i386-$(CONFIG_VHOST_USER_NET_TEST_i386) += 
> tests/vhost-user-test$(EX
>  ifeq ($(CONFIG_VHOST_USER_NET_TEST_i386),)
>  check-qtest-x86_64-$(CONFIG_VHOST_USER_NET_TEST_x86_64) += 
> tests/vhost-user-test$(EXESUF)
>  endif
> +check-qtest-i386-$(CONFIG_TPM) += tests/tpm-crb-swtpm-test$(EXESUF)
>  check-qtest-i386-$(CONFIG_TPM) += tests/tpm-crb-test$(EXESUF)
>  check-qtest-i386-$(CONFIG_TPM) += tests/tpm-tis-test$(EXESUF)
>  check-qtest-i386-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF)
> @@ -719,6 +720,8 @@ tests/test-util-sockets$(EXESUF): 
> tests/test-util-sockets.o \
>  tests/test-io-task$(EXESUF): tests/test-io-task.o $(test-io-obj-y)
>  tests/test-io-channel-socket$(EXESUF): tests/test-io-channel-socket.o \
>  tests/io-channel-helpers.o tests/socket-helpers.o $(test-io-obj-y)
> +tests/tpm-crb-swtpm-test$(EXESUF): tests/tpm-crb-swtpm-test.o 
> tests/tpm-emu.o \
> +   tests/tpm-util.o $(test-io-obj-y)
>  tests/tpm-crb-test$(EXESUF): tests/tpm-crb-test.o tests/tpm-emu.o 
> $(test-io-obj-y)
>  tests/tpm-tis-test$(EXESUF): tests/tpm-tis-test.o tests/tpm-emu.o 
> $(test-io-obj-y)
>  tests/test-io-channel-file$(EXESUF): tests/test-io-channel-file.o \
> diff --git a/tests/tpm-crb-swtpm-test.c b/tests/tpm-crb-swtpm-test.c
> new file mode 100644
> index 000..b2f6068
> --- /dev/null
> +++ b/tests/tpm-crb-swtpm-test.c
> @@ -0,0 +1,244 @@
> +/*
> + * QTest testcase for TPM CRB talking to external swtpm and swtpm migration
> + *
> + * Copyright (c) 2018 IBM Corporation
> + *  with parts borrowed from migration-test.c that is:
> + * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates
> + *
> + * Authors:
> + *   Stefan Berger 
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include 
> +
> +#include "hw/acpi/tpm.h"
> +#include "io/channel-socket.h"
> +#include "libqtest.h"
> +#include "tpm-util.h"
> +#include "sysemu/tpm.h"
> +#include "qapi/qmp/qdict.h"
> +
> +typedef struct TestState {
> +char *src_tpm_path;
> +char *dst_tpm_path;
> +char *uri;
> +} TestState;
> +
> +bool got_stop;
> +
> +static void migrate(QTestState *who, const char *uri)
> +{
> +QDict *rsp;
> +gchar *cmd;
> +
> +cmd = g_strdup_printf("{ 'execute': 'migrate',"
> +  "'arguments': { 'uri': '%s' } }",
> +  uri);
> +rsp = qtest_qmp(who, cmd);
> +g_free(cmd);
> +g_assert(qdict_haskey(rsp, "return"));
> +QDECREF(rsp);
> +}
> +
> +/*
> + * Events can get in the way of responses we are actually waiting for.
> + */
> +static QDict *wait_command(QTestState *who, const char *command)
> +{
> +const char *event_string;
> +QDict *response;
> +
> +response = qtest_qmp(who, command);
> +
> +while (qdict_haskey(response, "event")) {
> +   

Re: [Qemu-devel] [PATCH v5 for 2.13 3/4] docs: tpm: add VM save/restore example and troubleshooting guide

2018-03-28 Thread Marc-André Lureau
Hi

On Fri, Mar 16, 2018 at 10:46 PM, Stefan Berger
 wrote:
> Extend the docs related to TPM with specs related to VM save and
> restore and a troubleshooting guide for TPM migration.
>

Thanks a lot for writing this! some questions below

> Signed-off-by: Stefan Berger 
> ---
>  docs/specs/tpm.txt | 106 
> +
>  1 file changed, 106 insertions(+)
>
> diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
> index d1d7157..c230c4c 100644
> --- a/docs/specs/tpm.txt
> +++ b/docs/specs/tpm.txt
> @@ -200,3 +200,109 @@ crw---. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0
>  PCR-00: 35 4E 3B CE 23 9F 38 59 ...
>  ...
>  PCR-23: 00 00 00 00 00 00 00 00 ...
> +
> +
> +=== Migration with the TPM emulator ===
> +
> +The TPM emulator supports the following types of virtual machine migration:
> +
> +- VM save / restore (migration into a file)
> +- Network migration
> +- Snapshotting (migration into storage like QoW2 or QED)
> +
> +The following command sequences can be used to test VM save / restore.
> +
> +
> +In a 1st terminal start an instance of a swtpm using the following command:
> +
> +mkdir /tmp/mytpm1
> +swtpm socket --tpmstate dir=/tmp/mytpm1 \
> +  --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \
> +  --log level=20 --tpm2
> +
> +In a 2nd terminal start the VM:
> +
> +qemu-system-x86_64 -display sdl -enable-kvm \
> +  -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
> +  -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \
> +  -tpmdev emulator,id=tpm0,chardev=chrtpm \
> +  -device tpm-tis,tpmdev=tpm0 \
> +  -monitor stdio \
> +  test.img
> +
> +Verify that the attached TPM is working as expected using applications inside
> +the VM.
> +
> +To store the state of the VM use the following command in the QEMU monitor in
> +the 2nd terminal:
> +
> +(qemu) migrate "exec:cat > testvm.bin"
> +(qemu) quit
> +
> +At this point a file called 'testvm.bin' should exists and the swtpm and QEMU
> +processes should have ended.

When is swtpm ending, when qemu leaves? Hopefully you can do several
migrate commands.

> +
> +To test 'VM restore' you have to start the swtpm with the same parameters
> +as before. If previously a TPM 2 [--tpm2] was saved, --tpm2 must now be
> +passed again on the command line.
> +
> +In the 1st terminal restart the swtpm with the same command line as before:
> +
> +swtpm socket --tpmstate dir=/tmp/mytpm1 \
> +  --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \
> +  --log level=20 --tpm2

Does that mean the tpmstate directory content must be the same and
thus migrated as well? Can in be empty in the destination? If not,
what should be done to initialize it? Could it be empty instead?

> +
> +In the 2nd terminal restore the state of the VM using the additonal
> +'-incoming' option.
> +
> +qemu-system-x86_64 -display sdl -enable-kvm \
> +  -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
> +  -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \
> +  -tpmdev emulator,id=tpm0,chardev=chrtpm \
> +  -device tpm-tis,tpmdev=tpm0 \
> +  -incoming "exec:cat < testvm.bin" \
> +  test.img
> +
> +
> +Troubleshooting migration:
> +
> +There are several reasons why migration may fail. In case of problems,
> +please ensure that the command lines adhere to the following rules and,
> +if possible, that identical versions of QEMU and swtpm are used at all
> +times.
> +
> +VM save and restore:
> + - QEMU command line parameters should be identical apart from the
> +   '-incoming' option on VM restore
> + - swtpm command line parameters should be identical
> +
> +VM migration to 'localhost':
> + - QEMU command line parameters should be identical apart from the
> +   '-incoming' option on the destination side
> + - swtpm command line parameters should point to two different
> +   directories on the source and destination swtpm (--tpmstate dir=...)
> +   (especially if different versions of libtpms were to be used on the
> +   same machine).
> +
> +VM migration across the network:
> + - QEMU command line parameters should be identical apart from the
> +   '-incoming' option on the destination side
> + - swtpm command line parameters should be identical
> +
> +VM Snapshotting:
> + - QEMU command line parameters should be identical
> + - swtpm command line parameters should be identical
> +
> +
> +Besides that, migration failure reasons on the swtpm level may include
> +the following:
> +
> + - the versions of the swtpm on the source and destination sides are
> +   incompatible
> +   - downgrading of TPM state may not be supported
> +   - the source and destination libtpms were compiled with different
> + compile-time options and the destination side refuses to accept the
> + state
> + - different migration keys are used on the source and destination side
> +   and the destination side cannot decrypt the migrated state
> +   (swtpm ... --migration-key ... )
> --
> 2.5.5
>




-- 
Marc-André Lureau



Re: [Qemu-devel] [PATCH v4 2/2] tpm: extend TPM TIS with state migration support

2018-03-28 Thread Marc-André Lureau
Hi

On Thu, Mar 1, 2018 at 8:59 PM, Stefan Berger
 wrote:
> Extend the TPM TIS interface with state migration support.
>
> We need to synchronize with the backend thread to make sure that a command
> being processed by the external TPM emulator has completed and its
> response been received.
>
> Signed-off-by: Stefan Berger 
> ---
>  hw/tpm/tpm_tis.c | 54 +-
>  1 file changed, 53 insertions(+), 1 deletion(-)
>
> diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
> index 834eef7..5016d28 100644
> --- a/hw/tpm/tpm_tis.c
> +++ b/hw/tpm/tpm_tis.c
> @@ -902,9 +902,61 @@ static void tpm_tis_reset(DeviceState *dev)
>  tpm_backend_startup_tpm(s->be_driver, s->be_buffer_size);
>  }
>
> +/* persistent state handling */
> +
> +static int tpm_tis_pre_save(void *opaque)
> +{
> +TPMState *s = opaque;
> +uint8_t locty = s->active_locty;
> +
> +DPRINTF("tpm_tis: suspend: locty = %d : rw_offset = %u\n",
> +locty, s->rw_offset);
> +#ifdef DEBUG_TIS
> +tpm_tis_dump_state(opaque, 0);
> +#endif
> +
> +/*
> + * Synchronize with backend completion.
> + */
> +tpm_backend_finish_sync(s->be_driver);
> +
> +return 0;
> +}
> +
> +static const VMStateDescription vmstate_locty = {
> +.name = "loc",
> +.version_id = 1,
> +.minimum_version_id = 0,
> +.minimum_version_id_old = 0,

I don't understand the problem there is leaving all the version fields
to 0, just like CRB.

> +.fields  = (VMStateField[]) {
> +VMSTATE_UINT32(state, TPMLocality),
> +VMSTATE_UINT32(inte, TPMLocality),
> +VMSTATE_UINT32(ints, TPMLocality),
> +VMSTATE_UINT8(access, TPMLocality),
> +VMSTATE_UINT32(sts, TPMLocality),
> +VMSTATE_UINT32(iface_id, TPMLocality),
> +VMSTATE_END_OF_LIST(),
> +}
> +};
> +
>  static const VMStateDescription vmstate_tpm_tis = {
>  .name = "tpm",
> -.unmigratable = 1,
> +.version_id = 1,
> +.minimum_version_id = 0,
> +.minimum_version_id_old = 0,

same

If you remove the version fields: Reviewed-by: Marc-André Lureau




> +.pre_save  = tpm_tis_pre_save,
> +.fields = (VMStateField[]) {
> +VMSTATE_BUFFER(buffer, TPMState),
> +VMSTATE_UINT16(rw_offset, TPMState),
> +VMSTATE_UINT8(active_locty, TPMState),
> +VMSTATE_UINT8(aborting_locty, TPMState),
> +VMSTATE_UINT8(next_locty, TPMState),
> +
> +VMSTATE_STRUCT_ARRAY(loc, TPMState, TPM_TIS_NUM_LOCALITIES, 1,
> + vmstate_locty, TPMLocality),
> +
> +VMSTATE_END_OF_LIST()
> +}
>  };
>
>  static Property tpm_tis_properties[] = {
> --
> 2.5.5
>



Re: [Qemu-devel] [PULL for-2.12 0/1] tcg mul[us]h fix

2018-03-28 Thread Peter Maydell
On 28 March 2018 at 05:51, Richard Henderson
<richard.hender...@linaro.org> wrote:
> This is material for stable as well.
>
>
> r~
>
>
> The following changes since commit fa3704d87720d7049d483ff669b9e2ff991e7658:
>
>   Update version for v2.12.0-rc1 release (2018-03-27 22:04:23 +0100)
>
> are available in the Git repository at:
>
>   git://github.com/rth7680/qemu.git tags/pull-tcg-20180328
>
> for you to fetch changes up to f2f1dde75160cac6ede330f3db50dc817d01a2d6:
>
>   tcg: Mark muluh_i64 and mulsh_i64 as 64-bit ops (2018-03-28 12:45:16 +0800)
>
> 
> Fix muluh_i64 and mulsh_i64 flags
>
> 
> Richard Henderson (1):
>   tcg: Mark muluh_i64 and mulsh_i64 as 64-bit ops

Applied, thanks.

-- PMM



Re: [Qemu-devel] [PATCH v1 02/14] tests: add fp-test, a floating point test suite

2018-03-28 Thread Emilio G. Cota
On Wed, Mar 28, 2018 at 10:51:30 +0100, Alex Bennée wrote:
> Emilio G. Cota  writes:
> >> So this is a unit test of our code rather than a test program running
> >> under QEMU?
> >
> > Having the -t host/soft flags allows you flexibility in what to test.
> >
> > With "host" mode, you're generating a binary that knows nothing
> > about QEMU, i.e. all its FP operations are native. You can use this
> > to (1) figure out whether your host diverts from the model [hopefully
> > it doesn't in anything substantial], and (2) test whether QEMU mimics
> > the corresponding host by running the binary under *-linux-user.
> 
> OK - there is no reason why we can't cross compile the single source
> file for multiple tests/tcg/targets. I'll look at that when I have
> another run at the cross compile stuff.

In fact I tried this first. I generated one executable per target,
compiling softfloat.o with the corresponding -DTARGET_FOO.

Then I realised this isn't good enough. The problem is that this
only buys you the target-specific part of softfloat, which boils
down to sNaN representation and little else. You still have to
pass your own float_status (with rounding etc.), and decide whether
to act or to ignore whatever flags softfloat sets after an op.

In short, having this without having the actual target code doesn't
buy you much.

I think a better alternative is to:
1- Compile fp-test using host mode on actual hardware. Let's call
   this executable fp-test-host-$arch.
2- Compare fp-test results of running on actual hardware against
   running fp-test-host-$arch on linux-user-$arch.

This will give us complete coverage of both softfloat and
the target code that calls softfloat (and raises exceptions etc.).

> >> If so we really should be building this automatically in make check.
> >
> > Yes, passing -soft mode would certainly be valuable and trivial
> > to integrate since there is nothing built that is target-dependent.
> 
> So the two bugs I'm currently fixing are guest dependent so can't be
> caught by soft mode:
> 
>   - ARM FP16 alternative format behaviour
>   - round_to_int_and_pack refactor broke TriCore ftoi insns (1759264)

Yes, passing soft mode is a necessary condition for correctness but
it isn't sufficient--arch-specific bugs can also happen! And those
bugs might be in softfloat and/or in target/$arch/*.

For that we'll need something like what I sketched above.

> I assume your bug was?
> 
>   - fix {min,max}nummag for same-abs-value inputs (from your series)

Yep.

Thanks,

E.




Re: [Qemu-devel] [PATCH v2 0/6] Extend vhost-user to support VFIO based accelerators

2018-03-28 Thread Michael S. Tsirkin
On Wed, Mar 28, 2018 at 08:24:07PM +0800, Tiwei Bie wrote:
> > > Update notes
> > > 
> > > 
> > > IOMMU feature bit check is removed in this version, because:
> > > 
> > > The IOMMU feature is negotiable, when an accelerator is used and
> > > it doesn't support virtual IOMMU, its driver just won't provide
> > > this feature bit when vhost library querying its features. And if
> > > it supports the virtual IOMMU, its driver can provide this feature
> > > bit. It's not reasonable to add this limitation in this patch set.
> > 
> > Fair enough. Still:
> > Can hardware on intel platforms actually support IOTLB requests?
> > Don't you need to add support for vIOMMU shadowing instead?
> > 
> 
> For the hardware I have, I guess they can't for now.

So VFIO in QEMU has support for vIOMMU shadowing.
Can you use that somehow?

Ability to run dpdk within guest seems important.

-- 
MST



Re: [Qemu-devel] [PATCH for 2.12 1/1] block: allow recursive calling of bdrv_set_aio_context

2018-03-28 Thread Denis V. Lunev
On 03/28/2018 06:31 PM, Max Reitz wrote:
> On 2018-03-27 15:30, Denis V. Lunev wrote:
>> We have received the following assert on QEMU 2.9:
>>
>> (gdb) bt
>> 0  0x7f6f67d281f7 in __GI_raise ()
>> 1  0x7f6f67d298e8 in __GI_abort ()
>> 2  0x7f6f67d21266 in __assert_fail_base ()
>> 3  0x7f6f67d21312 in __GI___assert_fail ()
>> 4  0x55a8faf76f9f in bdrv_detach_aio_context ()
>> 5  0x55a8faf76f68 in bdrv_detach_aio_context ()
>> 6  0x55a8faf770c6 in bdrv_set_aio_context ()
>> 7  0x55a8fafb780d in blk_set_aio_context ()
>> 8  0x55a8faf7af08 in block_job_attached_aio_context ()
>> 9  0x55a8faf77043 in bdrv_attach_aio_context ()
>> 10 0x55a8faf770d9 in bdrv_set_aio_context ()
>> 11 0x55a8fafb780d in blk_set_aio_context ()
>> 12 0x55a8fad580e7 in virtio_blk_data_plane_stop ()
>> 13 0x55a8faf11da5 in virtio_bus_stop_ioeventfd ()
>> 14 0x55a8fad85604 in virtio_vmstate_change ()
>> 15 0x55a8fae1ba52 in vm_state_notify ()
>> 16 0x55a8fad273e5 in do_vm_stop ()
>> 17 vm_stop ()
>> 18 0x55a8face8f28 in main_loop_should_exit ()
>> 19 main_loop ()
>> 20 main ()
>> (gdb)
>>
>> It does not look, that the code is fundumentally different in 2.12.
>>
>> block_job_attached_aio_context() calls backup_attached_aio_context(),
>> which in turn calls bdrv_detach_aio_context() again. This results in
>> assert(!bs->walking_aio_notifiers).
>>
>> The code in mirror is basically the same. The patch replaces boolean
>> condition with incremental counter, which should solve the problem.
>>
>> Signed-off-by: Denis V. Lunev 
>> CC: Kevin Wolf 
>> CC: Max Reitz 
>> ---
>>  include/block/block_int.h |  2 +-
>>  block.c   | 12 ++--
>>  2 files changed, 7 insertions(+), 7 deletions(-)
> Changing this to a counter looks OK to me, but dealing with a recursive
> bdrv_set_aio_context() might be a bit more complicated than that.  It
> calls for trouble if one of the aio_notifiers assigns a different
> AioContext than was just assigned, if nothing else then because we have
> to make sure not to call any other notifier with the old "new" context.
>
> In this case, that shouldn't be an issue because we will simply assign
> the new context again, so it's actually a no-op.  Maybe we could allow
> that case alone, but even then we have to verify it.
>
> Another thing I'm wondering is how this prevents infinite recursion.
> Won't we just call the notifier again which will try to again set its
> BlockBackend's AioContext?
good catch to think about. AIO context here was different that one one top.
I'll come back after more evaluation.

> What is the full case anyway?  The AioContext of the source changes,
> which results in the call of backup's notifier, which will then change
> the AioContext of the target.  So source and target would need to be in
> the same chain if setting target's context conflicts with a context
> change on source.
Unfortunately I do not know :( This has been happened on the customer
site. The only thing we have is coredump, which gives above mentioned
trace.

Den

> Max
>
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index 29cafa4..a290711 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -613,7 +613,7 @@ struct BlockDriverState {
>>   * BDS may register themselves in this list to be notified of changes
>>   * regarding this BDS's context */
>>  QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
>> -bool walking_aio_notifiers; /* to make removal during iteration safe */
>> +int walking_aio_notifiers; /* to make removal during iteration safe */
>>  
>>  char filename[PATH_MAX];
>>  char backing_file[PATH_MAX]; /* if non zero, the image is a diff of
>> diff --git a/block.c b/block.c
>> index a8da4f2..82cc07b 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -4739,8 +4739,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
>>  return;
>>  }
>>  
>> -assert(!bs->walking_aio_notifiers);
>> -bs->walking_aio_notifiers = true;
>> +bs->walking_aio_notifiers++;
>>  QLIST_FOREACH_SAFE(baf, >aio_notifiers, list, baf_tmp) {
>>  if (baf->deleted) {
>>  bdrv_do_remove_aio_context_notifier(baf);
>> @@ -4751,7 +4750,8 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
>>  /* Never mind iterating again to check for ->deleted.  bdrv_close() will
>>   * remove remaining aio notifiers if we aren't called again.
>>   */
>> -bs->walking_aio_notifiers = false;
>> +bs->walking_aio_notifiers--;
>> +assert(bs->walking_aio_notifiers >= 0);
>>  
>>  if (bs->drv->bdrv_detach_aio_context) {
>>  bs->drv->bdrv_detach_aio_context(bs);
>> @@ -4782,8 +4782,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
>>  bs->drv->bdrv_attach_aio_context(bs, new_context);
>>  }
>>  
>> -assert(!bs->walking_aio_notifiers);
>> -

[Qemu-devel] [PATCH for-2.12 v2 1/2] i386/hyperv: add hv-frequencies cpu property

2018-03-28 Thread Roman Kagan
In order to guarantee compatibility on migration, QEMU should have
complete control over the features it announces to the guest via CPUID.

However, the availability of Hyper-V frequency MSRs
(HV_X64_MSR_TSC_FREQUENCY and HV_X64_MSR_APIC_FREQUENCY) depends solely
on the support for them in the underlying KVM.

Introduce "hv-frequencies" cpu property (off by default) which gives
QEMU full control over whether these MSRs are announced.

While at this, drop the redundant check of the cpu tsc frequency, and
decouple this feature from hv-time.

Signed-off-by: Roman Kagan 
---
v1 -> v2:
 - indicate what flag requested the feature that can't be enabled in the
   error message

 target/i386/cpu.h |  1 +
 target/i386/cpu.c |  1 +
 target/i386/kvm.c | 13 +
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 78db1b833a..1b219fafc4 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1296,6 +1296,7 @@ struct X86CPU {
 bool hyperv_runtime;
 bool hyperv_synic;
 bool hyperv_stimer;
+bool hyperv_frequencies;
 bool check_cpuid;
 bool enforce_cpuid;
 bool expose_kvm;
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 555ae79d29..1a6b082b6f 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4761,6 +4761,7 @@ static Property x86_cpu_properties[] = {
 DEFINE_PROP_BOOL("hv-runtime", X86CPU, hyperv_runtime, false),
 DEFINE_PROP_BOOL("hv-synic", X86CPU, hyperv_synic, false),
 DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false),
+DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false),
 DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
 DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
 DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index d23fff12f5..b35623ae24 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -648,11 +648,16 @@ static int hyperv_handle_properties(CPUState *cs)
 env->features[FEAT_HYPERV_EAX] |= HV_HYPERCALL_AVAILABLE;
 env->features[FEAT_HYPERV_EAX] |= HV_TIME_REF_COUNT_AVAILABLE;
 env->features[FEAT_HYPERV_EAX] |= HV_REFERENCE_TSC_AVAILABLE;
-
-if (has_msr_hv_frequencies && tsc_is_stable_and_known(env)) {
-env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS;
-env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE;
+}
+if (cpu->hyperv_frequencies) {
+if (!has_msr_hv_frequencies) {
+fprintf(stderr, "Hyper-V frequency MSRs "
+"(requested by 'hv-frequencies' cpu flag) "
+"are not supported by kernel\n");
+return -ENOSYS;
 }
+env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS;
+env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE;
 }
 if (cpu->hyperv_crash && has_msr_hv_crash) {
 env->features[FEAT_HYPERV_EDX] |= HV_GUEST_CRASH_MSR_AVAILABLE;
-- 
2.14.3




Re: [Qemu-devel] [PATCH for 2.12 1/1] block: allow recursive calling of bdrv_set_aio_context

2018-03-28 Thread Max Reitz
On 2018-03-27 15:30, Denis V. Lunev wrote:
> We have received the following assert on QEMU 2.9:
> 
> (gdb) bt
> 0  0x7f6f67d281f7 in __GI_raise ()
> 1  0x7f6f67d298e8 in __GI_abort ()
> 2  0x7f6f67d21266 in __assert_fail_base ()
> 3  0x7f6f67d21312 in __GI___assert_fail ()
> 4  0x55a8faf76f9f in bdrv_detach_aio_context ()
> 5  0x55a8faf76f68 in bdrv_detach_aio_context ()
> 6  0x55a8faf770c6 in bdrv_set_aio_context ()
> 7  0x55a8fafb780d in blk_set_aio_context ()
> 8  0x55a8faf7af08 in block_job_attached_aio_context ()
> 9  0x55a8faf77043 in bdrv_attach_aio_context ()
> 10 0x55a8faf770d9 in bdrv_set_aio_context ()
> 11 0x55a8fafb780d in blk_set_aio_context ()
> 12 0x55a8fad580e7 in virtio_blk_data_plane_stop ()
> 13 0x55a8faf11da5 in virtio_bus_stop_ioeventfd ()
> 14 0x55a8fad85604 in virtio_vmstate_change ()
> 15 0x55a8fae1ba52 in vm_state_notify ()
> 16 0x55a8fad273e5 in do_vm_stop ()
> 17 vm_stop ()
> 18 0x55a8face8f28 in main_loop_should_exit ()
> 19 main_loop ()
> 20 main ()
> (gdb)
> 
> It does not look, that the code is fundumentally different in 2.12.
> 
> block_job_attached_aio_context() calls backup_attached_aio_context(),
> which in turn calls bdrv_detach_aio_context() again. This results in
> assert(!bs->walking_aio_notifiers).
> 
> The code in mirror is basically the same. The patch replaces boolean
> condition with incremental counter, which should solve the problem.
> 
> Signed-off-by: Denis V. Lunev 
> CC: Kevin Wolf 
> CC: Max Reitz 
> ---
>  include/block/block_int.h |  2 +-
>  block.c   | 12 ++--
>  2 files changed, 7 insertions(+), 7 deletions(-)

Changing this to a counter looks OK to me, but dealing with a recursive
bdrv_set_aio_context() might be a bit more complicated than that.  It
calls for trouble if one of the aio_notifiers assigns a different
AioContext than was just assigned, if nothing else then because we have
to make sure not to call any other notifier with the old "new" context.

In this case, that shouldn't be an issue because we will simply assign
the new context again, so it's actually a no-op.  Maybe we could allow
that case alone, but even then we have to verify it.

Another thing I'm wondering is how this prevents infinite recursion.
Won't we just call the notifier again which will try to again set its
BlockBackend's AioContext?

What is the full case anyway?  The AioContext of the source changes,
which results in the call of backup's notifier, which will then change
the AioContext of the target.  So source and target would need to be in
the same chain if setting target's context conflicts with a context
change on source.

Max

> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 29cafa4..a290711 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -613,7 +613,7 @@ struct BlockDriverState {
>   * BDS may register themselves in this list to be notified of changes
>   * regarding this BDS's context */
>  QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
> -bool walking_aio_notifiers; /* to make removal during iteration safe */
> +int walking_aio_notifiers; /* to make removal during iteration safe */
>  
>  char filename[PATH_MAX];
>  char backing_file[PATH_MAX]; /* if non zero, the image is a diff of
> diff --git a/block.c b/block.c
> index a8da4f2..82cc07b 100644
> --- a/block.c
> +++ b/block.c
> @@ -4739,8 +4739,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
>  return;
>  }
>  
> -assert(!bs->walking_aio_notifiers);
> -bs->walking_aio_notifiers = true;
> +bs->walking_aio_notifiers++;
>  QLIST_FOREACH_SAFE(baf, >aio_notifiers, list, baf_tmp) {
>  if (baf->deleted) {
>  bdrv_do_remove_aio_context_notifier(baf);
> @@ -4751,7 +4750,8 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
>  /* Never mind iterating again to check for ->deleted.  bdrv_close() will
>   * remove remaining aio notifiers if we aren't called again.
>   */
> -bs->walking_aio_notifiers = false;
> +bs->walking_aio_notifiers--;
> +assert(bs->walking_aio_notifiers >= 0);
>  
>  if (bs->drv->bdrv_detach_aio_context) {
>  bs->drv->bdrv_detach_aio_context(bs);
> @@ -4782,8 +4782,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
>  bs->drv->bdrv_attach_aio_context(bs, new_context);
>  }
>  
> -assert(!bs->walking_aio_notifiers);
> -bs->walking_aio_notifiers = true;
> +bs->walking_aio_notifiers++;
>  QLIST_FOREACH_SAFE(ban, >aio_notifiers, list, ban_tmp) {
>  if (ban->deleted) {
>  bdrv_do_remove_aio_context_notifier(ban);
> @@ -4791,7 +4790,8 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
>  ban->attached_aio_context(new_context, ban->opaque);
>  }
>  }
> -bs->walking_aio_notifiers = false;
> +

[Qemu-devel] [PATCH for-2.12 v2 0/2] i386/hyperv: fully control Hyper-V features in CPUID

2018-03-28 Thread Roman Kagan
In order to guarantee compatibility on migration, QEMU should have
complete control over the features it announces to the guest via CPUID.

However, a number of Hyper-V-related features happen to depend on the
support in the underlying KVM, with no regard to QEMU configuration.

Make QEMU regain control over what Hyper-V features it announces to the
guest.

Note #1: the patches are also being proposed[*] for stable-2.11, even
though one of them introduces a new cpu property.  This is done to
minimize the number of published QEMU releases where the behavior of the
features is unpredictable, with potentially fatal consequences for the
guest.

Note #2: there are other problems in the surrounding code, like ugly
error reporting or inconsistent population of MSRs.  I think this can be
put off to post-2.12.

[*] for the stable branch the second patch will have error returns
replaced with warnings; I'll post a separate series.

v1 -> v2:
 - indicate what flag requested the feature that can't be enabled in the
   error message
 - fix a typo in the error message for VP_RUNTIME

Roman Kagan (2):
  i386/hyperv: add hv-frequencies cpu property
  i386/hyperv: error out if features requested but unsupported

 target/i386/cpu.h |  1 +
 target/i386/cpu.c |  1 +
 target/i386/kvm.c | 45 +
 3 files changed, 39 insertions(+), 8 deletions(-)

-- 
2.14.3




[Qemu-devel] [PATCH for-2.12 v2 2/2] i386/hyperv: error out if features requested but unsupported

2018-03-28 Thread Roman Kagan
In order to guarantee compatibility on migration, QEMU should have
complete control over the features it announces to the guest via CPUID.

However, for a number of Hyper-V-related cpu properties, if the
corresponding feature is not supported by the underlying KVM, the
propery is silently ignored and the feature is not announced to the
guest.

Refuse to start with an error instead.

Signed-off-by: Roman Kagan 
---
v1 -> v2:
 - indicate what flag requested the feature that can't be enabled in the
   error message
 - fix a typo in the error message for VP_RUNTIME

 target/i386/kvm.c | 32 
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index b35623ae24..113926aff2 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -659,17 +659,41 @@ static int hyperv_handle_properties(CPUState *cs)
 env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS;
 env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE;
 }
-if (cpu->hyperv_crash && has_msr_hv_crash) {
+if (cpu->hyperv_crash) {
+if (!has_msr_hv_crash) {
+fprintf(stderr, "Hyper-V crash MSRs "
+"(requested by 'hv-crash' cpu flag) "
+"are not supported by kernel\n");
+return -ENOSYS;
+}
 env->features[FEAT_HYPERV_EDX] |= HV_GUEST_CRASH_MSR_AVAILABLE;
 }
 env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE;
-if (cpu->hyperv_reset && has_msr_hv_reset) {
+if (cpu->hyperv_reset) {
+if (!has_msr_hv_reset) {
+fprintf(stderr, "Hyper-V reset MSR "
+"(requested by 'hv-reset' cpu flag) "
+"is not supported by kernel\n");
+return -ENOSYS;
+}
 env->features[FEAT_HYPERV_EAX] |= HV_RESET_AVAILABLE;
 }
-if (cpu->hyperv_vpindex && has_msr_hv_vpindex) {
+if (cpu->hyperv_vpindex) {
+if (!has_msr_hv_vpindex) {
+fprintf(stderr, "Hyper-V VP_INDEX MSR "
+"(requested by 'hv-vpindex' cpu flag) "
+"is not supported by kernel\n");
+return -ENOSYS;
+}
 env->features[FEAT_HYPERV_EAX] |= HV_VP_INDEX_AVAILABLE;
 }
-if (cpu->hyperv_runtime && has_msr_hv_runtime) {
+if (cpu->hyperv_runtime) {
+if (!has_msr_hv_runtime) {
+fprintf(stderr, "Hyper-V VP_RUNTIME MSR "
+"(requested by 'hv-runtime' cpu flag) "
+"is not supported by kernel\n");
+return -ENOSYS;
+}
 env->features[FEAT_HYPERV_EAX] |= HV_VP_RUNTIME_AVAILABLE;
 }
 if (cpu->hyperv_synic) {
-- 
2.14.3




Re: [Qemu-devel] [PATCH v4 1/2] tpm: extend TPM emulator with state migration support

2018-03-28 Thread Marc-André Lureau
Hi

On Thu, Mar 1, 2018 at 8:59 PM, Stefan Berger
 wrote:
> Extend the TPM emulator backend device with state migration support.
>
> The external TPM emulator 'swtpm' provides a protocol over
> its control channel to retrieve its state blobs. We implement
> functions for getting and setting the different state blobs.
> In case the setting of the state blobs fails, we return a
> negative errno code to fail the start of the VM.
>
> Since we have an external TPM emulator, we need to make sure
> that we do not migrate the state for as long as it is busy
> processing a request. We need to wait for notification that
> the request has completed processing.
>
> Signed-off-by: Stefan Berger 
> ---

With this squashed:
Reviewed-by: Marc-André Lureau 



diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c
index 6d6158deea..ec9c25989b 100644
--- a/hw/tpm/tpm_emulator.c
+++ b/hw/tpm/tpm_emulator.c
@@ -894,6 +894,7 @@ static void tpm_emulator_shutdown(TPMEmulator *tpm_emu)
 static void tpm_emulator_inst_finalize(Object *obj)
 {
 TPMEmulator *tpm_emu = TPM_EMULATOR(obj);
+TPMBlobBuffers *state_blobs = _emu->state_blobs;

 tpm_emulator_shutdown(tpm_emu);

@@ -908,6 +909,10 @@ static void tpm_emulator_inst_finalize(Object *obj)
 error_free(tpm_emu->migration_blocker);
 }

+tpm_sized_buffer_reset(_blobs->volatil);
+tpm_sized_buffer_reset(_blobs->permanent);
+tpm_sized_buffer_reset(_blobs->savestate);
+
 qemu_mutex_destroy(_emu->mutex);

 vmstate_unregister(NULL, _tpm_emulator, obj);


>  hw/tpm/tpm_emulator.c | 312 
> --
>  1 file changed, 302 insertions(+), 10 deletions(-)
>
> diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c
> index b787aee..da877e5 100644
> --- a/hw/tpm/tpm_emulator.c
> +++ b/hw/tpm/tpm_emulator.c
> @@ -55,6 +55,19 @@
>  #define TPM_EMULATOR_IMPLEMENTS_ALL_CAPS(S, cap) (((S)->caps & (cap)) == 
> (cap))
>
>  /* data structures */
> +
> +/* blobs from the TPM; part of VM state when migrating */
> +typedef struct TPMBlobBuffers {
> +uint32_t permanent_flags;
> +TPMSizedBuffer permanent;
> +
> +uint32_t volatil_flags;
> +TPMSizedBuffer volatil;
> +
> +uint32_t savestate_flags;
> +TPMSizedBuffer savestate;
> +} TPMBlobBuffers;
> +
>  typedef struct TPMEmulator {
>  TPMBackend parent;
>
> @@ -70,6 +83,8 @@ typedef struct TPMEmulator {
>
>  unsigned int established_flag:1;
>  unsigned int established_flag_cached:1;
> +
> +TPMBlobBuffers state_blobs;
>  } TPMEmulator;
>
>
> @@ -301,7 +316,8 @@ static int tpm_emulator_set_buffer_size(TPMBackend *tb,
>  return 0;
>  }
>
> -static int tpm_emulator_startup_tpm(TPMBackend *tb, size_t buffersize)
> +static int _tpm_emulator_startup_tpm(TPMBackend *tb, size_t buffersize,
> + bool is_resume)
>  {
>  TPMEmulator *tpm_emu = TPM_EMULATOR(tb);
>  ptm_init init = {
> @@ -309,12 +325,17 @@ static int tpm_emulator_startup_tpm(TPMBackend *tb, 
> size_t buffersize)
>  };
>  ptm_res res;
>
> +DPRINTF("%s   is_resume: %d", __func__, is_resume);
> +
>  if (buffersize != 0 &&
>  tpm_emulator_set_buffer_size(tb, buffersize, NULL) < 0) {
>  goto err_exit;
>  }
>
> -DPRINTF("%s", __func__);
> +if (is_resume) {
> +init.u.req.init_flags = cpu_to_be32(PTM_INIT_FLAG_DELETE_VOLATILE);
> +}
> +
>  if (tpm_emulator_ctrlcmd(tpm_emu, CMD_INIT, , sizeof(init),
>   sizeof(init)) < 0) {
>  error_report("tpm-emulator: could not send INIT: %s",
> @@ -333,6 +354,11 @@ err_exit:
>  return -1;
>  }
>
> +static int tpm_emulator_startup_tpm(TPMBackend *tb, size_t buffersize)
> +{
> +return _tpm_emulator_startup_tpm(tb, buffersize, false);
> +}
> +
>  static bool tpm_emulator_get_tpm_established_flag(TPMBackend *tb)
>  {
>  TPMEmulator *tpm_emu = TPM_EMULATOR(tb);
> @@ -431,16 +457,21 @@ static size_t tpm_emulator_get_buffer_size(TPMBackend 
> *tb)
>  static int tpm_emulator_block_migration(TPMEmulator *tpm_emu)
>  {
>  Error *err = NULL;
> +ptm_cap caps = PTM_CAP_GET_STATEBLOB | PTM_CAP_SET_STATEBLOB |
> +   PTM_CAP_STOP;
>
> -error_setg(_emu->migration_blocker,
> -   "Migration disabled: TPM emulator not yet migratable");
> -migrate_add_blocker(tpm_emu->migration_blocker, );
> -if (err) {
> -error_report_err(err);
> -error_free(tpm_emu->migration_blocker);
> -tpm_emu->migration_blocker = NULL;
> +if (!TPM_EMULATOR_IMPLEMENTS_ALL_CAPS(tpm_emu, caps)) {
> +error_setg(_emu->migration_blocker,
> +   "Migration disabled: TPM emulator does not support "
> +   "migration");
> +migrate_add_blocker(tpm_emu->migration_blocker, );
> +if (err) {
> +error_report_err(err);
> +

Re: [Qemu-devel] [PATCH v4 5/9] qapi: introduce new cmd option "allowed-in-preconfig"

2018-03-28 Thread Igor Mammedov
On Fri, 23 Mar 2018 16:11:53 -0500
Eric Blake  wrote:

> On 03/12/2018 08:11 AM, Igor Mammedov wrote:
[...]
> 
> > preconfig state but allowed in all other states like they used
> > to be.
> > 
> > Within this patch allow following commands in preconfig state:
> > qmp_capabilities
> > query-qmp-schema
> > query-commands
> > query-status
> > cont
> > to allow qmp connection, basic introspection and moving to the next
> > state.  
> 
> Looks like a reasonable list.  Maybe also query-command-line-options 
> should be here?
added
 
> > 
> > PS:
> > set-numa-node and query-hotpluggable-cpus will be enabled later in
> > a separate patch.
> > 
> > Signed-off-by: Igor Mammedov 
> > ---
> > v4:
> >* replaces complex "universal" approach
> >   "[PATCH v3 5/9] QAPI: allow to specify valid runstates  per command"
> >  with a simpler new command flag "allowed-in-preconfig".
> >  (Eric Blake )  
> 
> Thanks; it looks a lot more maintainable now.  However, you need to 
> rebase, now that 'allow-oob' has already landed.
rebased

[...]

All other comments are addressed as well



[Qemu-devel] [Bug 1539940] Re: Qemu 2.5 Solaris 8 and 9 sparc hang after terminal type menu

2018-03-28 Thread Zhen Ning Lim
** Changed in: qemu
   Status: New => Fix Released

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1539940

Title:
  Qemu 2.5 Solaris 8 and 9 sparc hang after terminal type menu

Status in QEMU:
  Fix Released

Bug description:
  Qemu command:
  qemu-system-sparc -nographic -monitor null -serial 
mon:telnet:localhost:3000,server -bios ../../Downloads/ss20_v2.25_rom -M SS-20 
-hda ./solsparc -m 512 -cdrom ./sol-9-905hw-ga-sparc-dvd.iso -boot d -cpu "TI 
SuperSparc 60" -net nic,vlan=1,macaddr=52:54:0:12:34:56

  
  when i do disk2:d, the system loads until the terminal type menu.

  What type of terminal are you using?
  1) ANSI Standard CRT
  2) DEC VT52
  3) DEC VT100
  4) Heathkit 19
  5) Lear Siegler ADM31
  6) PC Console
  7) Sun Command Tool
  8) Sun Workstation
  9) Televideo 910
  10) Televideo 925
  11) Wyse Model 50
  12) X Terminal Emulator (xterms)
  13) CDE Terminal Emulator (dtterm)
  14) Other
  Type the number of your choice and press Return: 3
  syslog service starting.
  savecore: no dump device configured
  Running in command line mode

  And nothing happens after that. Anyone encountered this issue?

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1539940/+subscriptions



[Qemu-devel] [Bug 1539940] Re: Qemu 2.5 Solaris 8 and 9 sparc hang after terminal type menu

2018-03-28 Thread m...@papersolve.com
This is no longer a problem (for sure in latest git, probably further
back than that, as I installed Solaris 9/SPARC on SS-20 a few months
ago):

Type the number of your choice and press Return: 3
syslog service starting.
savecore: no dump device configured
Running in command line mode

Please wait while the system information is loaded... /



Welcome to the Web Start Solaris Command Line installation!

The following questions will gather information about this system.
This information will be used to configure:

Network
Kerberos Security
Name Service
Date and Time
Root Password
Power Management

   


This can be resolved.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1539940

Title:
  Qemu 2.5 Solaris 8 and 9 sparc hang after terminal type menu

Status in QEMU:
  New

Bug description:
  Qemu command:
  qemu-system-sparc -nographic -monitor null -serial 
mon:telnet:localhost:3000,server -bios ../../Downloads/ss20_v2.25_rom -M SS-20 
-hda ./solsparc -m 512 -cdrom ./sol-9-905hw-ga-sparc-dvd.iso -boot d -cpu "TI 
SuperSparc 60" -net nic,vlan=1,macaddr=52:54:0:12:34:56

  
  when i do disk2:d, the system loads until the terminal type menu.

  What type of terminal are you using?
  1) ANSI Standard CRT
  2) DEC VT52
  3) DEC VT100
  4) Heathkit 19
  5) Lear Siegler ADM31
  6) PC Console
  7) Sun Command Tool
  8) Sun Workstation
  9) Televideo 910
  10) Televideo 925
  11) Wyse Model 50
  12) X Terminal Emulator (xterms)
  13) CDE Terminal Emulator (dtterm)
  14) Other
  Type the number of your choice and press Return: 3
  syslog service starting.
  savecore: no dump device configured
  Running in command line mode

  And nothing happens after that. Anyone encountered this issue?

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1539940/+subscriptions



[Qemu-devel] [Bug 1588591] Re: Qemu 2.6 Solaris 8 Sparc telnet terminate itself

2018-03-28 Thread m...@papersolve.com
Although I have occasionally seen this message with later versions of
QEMU running Solaris 8/SPARC it has never affected any operations for me
or terminated a telnet or QEMU process, so I think if it is still there
it's not having any affect.  So I think this can be closed.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1588591

Title:
  Qemu 2.6 Solaris 8 Sparc telnet terminate itself

Status in QEMU:
  New

Bug description:
  With Qemu 2.6, Solaris 8 can be installed and run. However, it
  sometimes terminate itself with I/O thread spun for 1000 iterations.

  qemu-system-sparc -nographic -monitor null -serial 
mon:telnet:0.0.0.0:3000,server -hda ./Sparc8.disk -m 256 -boot c -net 
nic,macaddr=52:54:0:12:34:56 -net tap,ifname=tap0,script=no,downscript=noQEMU 
waiting for connection on: disconnected:telnet:0.0.0.0:3000,server
  main-loop: WARNING: I/O thread spun for 1000 iterations

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1588591/+subscriptions



Re: [Qemu-devel] [PATCH v4 for 2.12 0/3] fix bitmaps migration through shared storage

2018-03-28 Thread Max Reitz
On 2018-03-27 12:11, Vladimir Sementsov-Ogievskiy wrote:
> 27.03.2018 12:53, Vladimir Sementsov-Ogievskiy wrote:
>> 27.03.2018 12:28, Vladimir Sementsov-Ogievskiy wrote:
>>> 26.03.2018 21:06, Max Reitz wrote:
 On 2018-03-20 18:05, Vladimir Sementsov-Ogievskiy wrote:
> Hi all.
>
> This fixes bitmaps migration through shared storage. Look at 02 for
> details.
>
> The bug introduced in 2.10 with the whole qcow2 bitmaps feature, so
> qemu-stable in CC. However I doubt that someone really suffered
> from this.
>
> Do we need dirty bitmaps at all in inactive case? - that was a
> question in v2.
> And, keeping in mind that we are going to use inactive mode not
> only for
> incoming migration, I'm not sure that answer is NO (but, it may be
> "NO" for
> 2.10, 2.11), so let's fix it in proposed here manner at least for
> 2.12.
 For some reason, I can't get 169 to work now at all[1]. What's more,
 whenever I run it, two (on current master, maybe more after this
 series)
 "cat $TEST_DIR/mig_file" processes stay around.  That doesn't seem
 right.

 However, this series doesn't seem to make it worse[2]...  So I'm
 keeping
 it.  I suppose it's just some issue with the test.

 Max


 [1] Sometimes there are migration even timeouts, sometimes just VM
 launch timeouts (specifically when VM B is supposed to be re-launched
 just after it has been shut down), and sometimes I get a dirty bitmap
 hash mismatch.


 [2] The whole timeline was:

 - Apply this series, everything seems alright

 (a couple of hours later)
 - Test some other things, stumble over 169 once or so

 - Focus on 169, fails a bit more often

 (today)
 - Can't get it to work at all

 - Can't get it to work in any version, neither before nor after this
 patch

 - Lose my sanity

 - Write this email

 O:-)

>>>
>>> hmm.. checked on current master (7b93d78a04aa24), tried a lot of
>>> times in a loop, works for me. How can I help?
>>>
>>
>> O, loop finally finished, with:
>>
>> 169 6s ... [failed, exit status 1] - output mismatch (see 169.out.bad)
>> --- /work/src/qemu/master/tests/qemu-iotests/169.out    2018-03-16
>> 21:01:19.536765587 +0300
>> +++ /work/src/qemu/master/tests/qemu-iotests/169.out.bad 2018-03-27
>> 12:33:03.804800350 +0300
>> @@ -1,5 +1,20 @@
>> -
>> +..E.
>> +==
>> +ERROR: test__persistent__not_migbitmap__offline
>> (__main__.TestDirtyBitmapMigration)
>> +methodcaller(name, ...) --> methodcaller object
>> +--
>> +Traceback (most recent call last):
>> +  File "169", line 129, in do_test_migration
>> +    self.vm_b.event_wait("RESUME", timeout=10.0)
>> +  File
>> "/work/src/qemu/master/tests/qemu-iotests/../../scripts/qemu.py", line
>> 349, in event_wait
>> +    event = self._qmp.pull_event(wait=timeout)
>> +  File
>> "/work/src/qemu/master/tests/qemu-iotests/../../scripts/qmp/qmp.py",
>> line 216, in pull_event
>> +    self.__get_events(wait)
>> +  File
>> "/work/src/qemu/master/tests/qemu-iotests/../../scripts/qmp/qmp.py",
>> line 124, in __get_events
>> +    raise QMPTimeoutError("Timeout waiting for event")
>> +QMPTimeoutError: Timeout waiting for event
>> +
>>  --
>>  Ran 8 tests
>>
>> -OK
>> +FAILED (errors=1)
>> Failures: 169
>> Failed 1 of 1 tests
>>
>>
>> and I have a lot of opened pipes, like:
>>
>> root   18685  0.0  0.0 107924   352 pts/0    S    12:19   0:00 cat
>> /work/src/qemu/master/tests/qemu-iotests/scratch/mig_file
>>
>> ...
>>
>> restart testing loop, it continues to pass 169 again and again...
>>
> 
>  and,
> 
> --- /work/src/qemu/master/tests/qemu-iotests/169.out    2018-03-16
> 21:01:19.536765587 +0300
> +++ /work/src/qemu/master/tests/qemu-iotests/169.out.bad 2018-03-27
> 12:58:44.804894014 +0300
> @@ -1,5 +1,20 @@
> -
> +F...
> +==
> +FAIL: test__not_persistent__migbitmap__offline
> (__main__.TestDirtyBitmapMigration)
> +methodcaller(name, ...) --> methodcaller object
> +--
> +Traceback (most recent call last):
> +  File "169", line 136, in do_test_migration
> +    self.check_bitmap(self.vm_b, sha256 if persistent else False)
> +  File "169", line 77, in check_bitmap
> +    "Dirty bitmap 'bitmap0' not found");
> +  File "/work/src/qemu/master/tests/qemu-iotests/iotests.py", line 422,
> in assert_qmp
> +    result = self.dictpath(d, path)
> +  File "/work/src/qemu/master/tests/qemu-iotests/iotests.py", line 381,
> in dictpath
> +    self.fail('failed path traversal for "%s" in "%s"' % (path, str(d)))
> +AssertionError: failed 

  1   2   >