date:20170113

[Qemu-devel] [PULL 0/4] tcg fixes

2017-01-13 Thread Richard Henderson

Two problems found with my most recent tcg-2.9 queued patches;
two patches for tcg/aarch64 that had been submitted but somehow
dropped off the patch queue.

With this, aarch64 risu passes on aarch64 again.


r~


The following changes since commit b6af8ea60282df514f87d32e36afd1c9aeee28c8:

  Merge remote-tracking branch 
'remotes/ehabkost/tags/x86-and-machine-pull-request' into staging (2017-01-13 
14:38:21 +)

are available in the git repository at:

  git://github.com/rth7680/qemu.git tags/pull-tcg-20170113

for you to fetch changes up to 8cf9a3d3f7a4b95f33e0bda5416b9c93ec887dd3:

  tcg/aarch64: Fix tcg_out_movi (2017-01-13 11:47:29 -0800)


Fixes and more queued patches


Richard Henderson (4):
  tcg/s390: Fix merge error with facilities
  target/arm: Fix ubfx et al for aarch64
  tcg/aarch64: Fix addsub2 for 0+C
  tcg/aarch64: Fix tcg_out_movi

 target/arm/translate-a64.c   |  2 +-
 tcg/aarch64/tcg-target.inc.c | 66 ++--
 tcg/s390/tcg-target.inc.c|  2 +-
 3 files changed, 35 insertions(+), 35 deletions(-)

[Qemu-devel] [PULL 2/4] target/arm: Fix ubfx et al for aarch64

2017-01-13 Thread Richard Henderson

The patch in 59a71b4c5b4e suffered from a merge failure
when compared to the original patch in

  http://lists.nongnu.org/archive/html/qemu-devel/2016-12/msg00137.html

Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 4f09dfb..d0352e2 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3217,7 +3217,7 @@ static void disas_bitfield(DisasContext *s, uint32_t insn)
 tcg_tmp = read_cpu_reg(s, rn, 1);
 
 /* Recognize simple(r) extractions.  */
-if (si <= ri) {
+if (si >= ri) {
 /* Wd = Wn */
 len = (si - ri) + 1;
 if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
-- 
2.9.3

Re: [Qemu-devel] [PATCH v6 3/7] trace: [tcg] Delay changes to dynamic state when translating

2017-01-13 Thread Lluís Vilanova

Paolo Bonzini writes:

> On 12/01/2017 20:37, Lluís Vilanova wrote:
>> Stefan Hajnoczi writes:
>> 
>>> On Tue, Jan 10, 2017 at 05:31:37PM +0100, Paolo Bonzini wrote:
 On 09/01/2017 18:01, Stefan Hajnoczi wrote:
> Or use a simpler scheme:
> 
> struct CPUState {
> ...
> uint32_t dstate_update_count;
> };
> 
> In trace_event_set_vcpu_state_dynamic():
> 
> if (state) {
> trace_events_enabled_count++;
> set_bit(vcpu_id, vcpu->trace_dstate_delayed);
> atomic_inc(&vcpu->dstate_update_count, 1);
> (*ev->dstate)++;
> } ...
> 
> In cpu_exec() and friends:
> 
> last_dstate_update_count = atomic_read(&vcpu->dstate_update_count);
> 
> tb = tb_find(cpu, last_tb, tb_exit);
> cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
> 
> /* apply and disable delayed dstate changes */
> if (unlikely(atomic_read(&cpu->dstate_update_count) != 
> last_dstate_update_count)) {
> bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
> trace_get_vcpu_event_count());
> }
> 
> (You'll need to adjust the details but the update counter approach
> should be workable.)
 
 Would it work to use async_run_on_cpu?
>> 
>>> I think so.
>> 
>> AFAIU we cannot use async_run_on_cpu(), since we need to reset the local
>> variable "last_tb" to avoid chaining TBs with different dstates, and we 
>> cannot
>> use cpu_loop_exit() inside the callback.

> async_run_on_cpu would run as soon as the currently executing TB
> finishes, and would leave cpu_exec completely, so there would be no
> chaining.

Aha, I've re-read the internals used by async and that'll be sufficient.


Thanks,
  Lluis

Re: [Qemu-devel] [PATCH v3 5/5] target-m68k: increment/decrement with SP

2017-01-13 Thread Richard Henderson

On 01/13/2017 10:36 AM, Laurent Vivier wrote:
> On 680x0 family only.
> 
> Address Register indirect With postincrement:
> 
> When using the stack pointer (A7) with byte size data, the register
> is incremented by two.
> 
> Address Register indirect With predecrement:
> 
> When using the stack pointer (A7) with byte size data, the register
> is decremented by two.
> 
> Signed-off-by: Laurent Vivier 
> Reviewed-by: Thomas Huth 
> ---
>  target/m68k/translate.c | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)

Reviewed-by: Richard Henderson 


r~

[Qemu-devel] [PATCH v7 1/7] exec: [tcg] Refactor flush of per-CPU virtual TB cache

2017-01-13 Thread Lluís Vilanova

The function is reused in later patches.

Signed-off-by: Lluís Vilanova 
Reviewed-by: Richard Henderson 
---
 cputlb.c|2 +-
 include/exec/exec-all.h |6 ++
 translate-all.c |   14 +-
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 813279f3bc..9bf9960e1b 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -80,7 +80,7 @@ void tlb_flush(CPUState *cpu, int flush_global)
 
 memset(env->tlb_table, -1, sizeof(env->tlb_table));
 memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
-memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+tb_flush_jmp_cache_all(cpu);
 
 env->vtlb_index = 0;
 env->tlb_flush_addr = -1;
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index a8c13cee66..57cd978578 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -256,6 +256,12 @@ struct TranslationBlock {
 };
 
 void tb_free(TranslationBlock *tb);
+/**
+ * tb_flush_jmp_cache_all:
+ *
+ * Flush the virtual translation block cache.
+ */
+void tb_flush_jmp_cache_all(CPUState *env);
 void tb_flush(CPUState *cpu);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
 
diff --git a/translate-all.c b/translate-all.c
index 3dd9214904..29ccb9e546 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -941,11 +941,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data 
tb_flush_count)
 }
 
 CPU_FOREACH(cpu) {
-int i;
-
-for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
-atomic_set(&cpu->tb_jmp_cache[i], NULL);
-}
+tb_flush_jmp_cache_all(cpu);
 }
 
 tcg_ctx.tb_ctx.nb_tbs = 0;
@@ -1741,6 +1737,14 @@ void tb_check_watchpoint(CPUState *cpu)
 }
 }
 
+void tb_flush_jmp_cache_all(CPUState *cpu)
+{
+int i;
+for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
+atomic_set(&cpu->tb_jmp_cache[i], NULL);
+}
+}
+
 #ifndef CONFIG_USER_ONLY
 /* in deterministic execution mode, instructions doing device I/Os
must be at the end of the TB */

[Qemu-devel] [PATCH v7 0/7] trace: [tcg] Optimize per-vCPU tracing states with separate TB caches

2017-01-13 Thread Lluís Vilanova

Optimizes tracing of events with the 'tcg' and 'vcpu' properties (e.g., memory
accesses), making it feasible to statically enable them by default on all QEMU
builds.

Some quick'n'dirty numbers with 400.perlbench (SPECcpu2006) on the train input
(medium size - suns.pl) and the guest_mem_before event:

* vanilla, statically disabled
real0m2,259s
user0m2,252s
sys 0m0,004s

* vanilla, statically enabled (overhead: 2.18x)
real0m4,921s
user0m4,912s
sys 0m0,008s

* multi-tb, statically disabled (overhead: 0.99x) [within noise range]
real0m2,228s
user0m2,216s
sys 0m0,008s

* multi-tb, statically enabled (overhead: 0.99x) [within noise range]
real0m2,229s
user0m2,224s
sys 0m0,004s


Right now, events with the 'tcg' property always generate TCG code to trace that
event at guest code execution time, where the event's dynamic state is checked.

This series adds a performance optimization where TCG code for events with the
'tcg' and 'vcpu' properties is not generated if the event is dynamically
disabled. This optimization raises two issues:

* An event can be dynamically disabled/enabled after the corresponding TCG code
  has been generated (i.e., a new TB with the corresponding code should be
  used).

* Each vCPU can have a different dynamic state for the same event (i.e., tracing
  the memory accesses of only one process pinned to a vCPU).

To handle both issues, this series integrates the dynamic tracing event state
into the TB hashing function, so that vCPUs tracing different events will use
separate TBs. Note that only events with the 'vcpu' property are used for
hashing (as stored in the bitmap of CPUState->trace_dstate).

This makes dynamic event state changes on vCPUs very efficient, since they can
use TBs produced by other vCPUs while on the same event state combination (or
produced by the same vCPU, earlier).

Discarded alternatives:

* Emitting TCG code to check if an event needs tracing, where we should still
  move the tracing call code to either a cold path (making tracing performance
  worse), or leave it inlined (making non-tracing performance worse).

* Eliding TCG code only when *zero* vCPUs are tracing an event, since enabling
  it on a single vCPU will impact the performance of all other vCPUs that are
  not tracing that event.

Signed-off-by: Lluís Vilanova 
---

Changes in v7
=

* Fix delayed dstate changes (now uses async_run_on_cpu() as suggested by Paolo
  Bonzini).

* Note to Richard: patch 4 has been adapted to the new patch 3 async callback,
  but is essentially the same.


Changes in v6
=

* Check hashing size error with QEMU_BUILD_BUG_ON [Richard Henderson].


Changes in v5
=

* Move define into "qemu-common.h" to allow compilation of tests.


Changes in v4
=

* Incorporate trace_dstate into the TB hashing function instead of using
  multiple physical TB caches [suggested by Richard Henderson].


Changes in v3
=

* Rebase on 0737f32daf.
* Do not use reserved symbol prefixes ("__") [Stefan Hajnoczi].
* Refactor trace_get_vcpu_event_count() to be inlinable.
* Optimize cpu_tb_cache_set_requested() (hottest path).


Changes in v2
=

* Fix bitmap copy in cpu_tb_cache_set_apply().
* Split generated code re-alignment into a separate patch [Daniel P. Berrange].


Lluís Vilanova (7):
  exec: [tcg] Refactor flush of per-CPU virtual TB cache
  trace: Make trace_get_vcpu_event_count() inlinable
  trace: [tcg] Delay changes to dynamic state when translating
  exec: [tcg] Use different TBs according to the vCPU's dynamic tracing 
state
  trace: [tcg] Do not generate TCG code to trace dinamically-disabled events
  trace: [tcg,trivial] Re-align generated code
  trace: [trivial] Statically enable all guest events


 cpu-exec.c   |   22 +-
 cputlb.c |2 +-
 include/exec/exec-all.h  |   11 +++
 include/exec/tb-hash-xx.h|8 +++-
 include/exec/tb-hash.h   |5 +++--
 include/qemu-common.h|3 +++
 include/qom/cpu.h|3 +++
 qom/cpu.c|2 ++
 scripts/tracetool/__init__.py|3 ++-
 scripts/tracetool/backend/dtrace.py  |4 ++--
 scripts/tracetool/backend/ftrace.py  |   20 ++--
 scripts/tracetool/backend/log.py |   19 ++-
 scripts/tracetool/backend/simple.py  |4 ++--
 scripts/tracetool/backend/syslog.py  |6 +++---
 scripts/tracetool/backend/ust.py |4 ++--
 scripts/tracetool/format/h.py|   26 +++---
 scripts/tracetool/format/tcg_h.py|   21 +
 scripts/tracetool/format/tcg_helper_c.py |5 +++--
 tests/qht-bench.c|2 +-
 trace-events |

[Qemu-devel] [PATCH v7 5/7] trace: [tcg] Do not generate TCG code to trace dinamically-disabled events

2017-01-13 Thread Lluís Vilanova

If an event is dynamically disabled, the TCG code that calls the
execution-time tracer is not generated.

Removes the overheads of execution-time tracers for dynamically disabled
events. As a bonus, also avoids checking the event state when the
execution-time tracer is called from TCG-generated code (since otherwise
TCG would simply not call it).

Signed-off-by: Lluís Vilanova 
---
 scripts/tracetool/__init__.py|3 ++-
 scripts/tracetool/format/h.py|   26 +++---
 scripts/tracetool/format/tcg_h.py|   21 +
 scripts/tracetool/format/tcg_helper_c.py |5 +++--
 4 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py
index 365446fa53..5f1576539e 100644
--- a/scripts/tracetool/__init__.py
+++ b/scripts/tracetool/__init__.py
@@ -6,7 +6,7 @@ Machinery for generating tracing-related intermediate files.
 """
 
 __author__ = "Lluís Vilanova "
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
 __license__= "GPL version 2 or (at your option) any later version"
 
 __maintainer__ = "Stefan Hajnoczi"
@@ -264,6 +264,7 @@ class Event(object):
 return self._FMT.findall(self.fmt)
 
 QEMU_TRACE   = "trace_%(name)s"
+QEMU_TRACE_NOCHECK   = "_nocheck__" + QEMU_TRACE
 QEMU_TRACE_TCG   = QEMU_TRACE + "_tcg"
 QEMU_DSTATE  = "_TRACE_%(NAME)s_DSTATE"
 QEMU_EVENT   = "_TRACE_%(NAME)s_EVENT"
diff --git a/scripts/tracetool/format/h.py b/scripts/tracetool/format/h.py
index 3682f4e6a8..aecf249d66 100644
--- a/scripts/tracetool/format/h.py
+++ b/scripts/tracetool/format/h.py
@@ -6,7 +6,7 @@ trace/generated-tracers.h
 """
 
 __author__ = "Lluís Vilanova "
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
 __license__= "GPL version 2 or (at your option) any later version"
 
 __maintainer__ = "Stefan Hajnoczi"
@@ -49,6 +49,19 @@ def generate(events, backend, group):
 backend.generate_begin(events, group)
 
 for e in events:
+# tracer without checks
+out('',
+'static inline void %(api)s(%(args)s)',
+'{',
+api=e.api(e.QEMU_TRACE_NOCHECK),
+args=e.args)
+
+if "disable" not in e.properties:
+backend.generate(e, group)
+
+out('}')
+
+# tracer wrapper with checks (per-vCPU tracing)
 if "vcpu" in e.properties:
 trace_cpu = next(iter(e.args))[1]
 cond = "trace_event_get_vcpu_state(%(cpu)s,"\
@@ -63,16 +76,15 @@ def generate(events, backend, group):
 'static inline void %(api)s(%(args)s)',
 '{',
 'if (%(cond)s) {',
+'%(api_nocheck)s(%(names)s);',
+'}',
+'}',
 api=e.api(),
+api_nocheck=e.api(e.QEMU_TRACE_NOCHECK),
 args=e.args,
+names=", ".join(e.args.names()),
 cond=cond)
 
-if "disable" not in e.properties:
-backend.generate(e, group)
-
-out('}',
-'}')
-
 backend.generate_end(events, group)
 
 out('#endif /* TRACE_%s_GENERATED_TRACERS_H */' % group.upper())
diff --git a/scripts/tracetool/format/tcg_h.py 
b/scripts/tracetool/format/tcg_h.py
index 5f213f6cba..9a73b5b268 100644
--- a/scripts/tracetool/format/tcg_h.py
+++ b/scripts/tracetool/format/tcg_h.py
@@ -6,7 +6,7 @@ Generate .h file for TCG code generation.
 """
 
 __author__ = "Lluís Vilanova "
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
 __license__= "GPL version 2 or (at your option) any later version"
 
 __maintainer__ = "Stefan Hajnoczi"
@@ -41,7 +41,7 @@ def generate(events, backend, group):
 
 for e in events:
 # just keep one of them
-if "tcg-trans" not in e.properties:
+if "tcg-exec" not in e.properties:
 continue
 
 out('static inline void %(name_tcg)s(%(args)s)',
@@ -53,12 +53,25 @@ def generate(events, backend, group):
 args_trans = e.original.event_trans.args
 args_exec = tracetool.vcpu.transform_args(
 "tcg_helper_c", e.original.event_exec, "wrapper")
+if "vcpu" in e.properties:
+trace_cpu = e.args.names()[0]
+cond = "trace_event_get_vcpu_state(%(cpu)s,"\
+   " TRACE_%(id)s)"\
+   % dict(
+   cpu=trace_cpu,
+   id=e.original.event_exec.name.upper())
+else:
+cond = "true"
+
 out('%(name_trans)s(%(argnames_trans)s);',
-'gen_helper_%(name_exec)s(%(argnames_exec)s);',
+'if (%(cond)s) {',
+'gen_h

[Qemu-devel] [PATCH v7 2/7] trace: Make trace_get_vcpu_event_count() inlinable

2017-01-13 Thread Lluís Vilanova

Later patches will make use of it.

Signed-off-by: Lluís Vilanova 
---
 trace/control-internal.h |7 ++-
 trace/control.c  |   11 +++
 trace/control.h  |4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/trace/control-internal.h b/trace/control-internal.h
index a9d395a587..a8beb44847 100644
--- a/trace/control-internal.h
+++ b/trace/control-internal.h
@@ -1,7 +1,7 @@
 /*
  * Interface for configuring and controlling the state of tracing events.
  *
- * Copyright (C) 2011-2016 Lluís Vilanova 
+ * Copyright (C) 2011-2017 Lluís Vilanova 
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -16,6 +16,7 @@
 
 
 extern int trace_events_enabled_count;
+extern uint32_t trace_next_vcpu_id;
 
 
 static inline bool trace_event_is_pattern(const char *str)
@@ -82,6 +83,10 @@ static inline bool 
trace_event_get_vcpu_state_dynamic(CPUState *vcpu,
 return trace_event_get_vcpu_state_dynamic_by_vcpu_id(vcpu, vcpu_id);
 }
 
+static inline uint32_t trace_get_vcpu_event_count(void)
+{
+return trace_next_vcpu_id;
+}
 
 void trace_event_register_group(TraceEvent **events);
 
diff --git a/trace/control.c b/trace/control.c
index 1a7bee6ddc..8a52c7fc39 100644
--- a/trace/control.c
+++ b/trace/control.c
@@ -1,7 +1,7 @@
 /*
  * Interface for configuring and controlling the state of tracing events.
  *
- * Copyright (C) 2011-2016 Lluís Vilanova 
+ * Copyright (C) 2011-2017 Lluís Vilanova 
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -36,7 +36,7 @@ typedef struct TraceEventGroup {
 static TraceEventGroup *event_groups;
 static size_t nevent_groups;
 static uint32_t next_id;
-static uint32_t next_vcpu_id;
+uint32_t trace_next_vcpu_id;
 
 QemuOptsList qemu_trace_opts = {
 .name = "trace",
@@ -65,7 +65,7 @@ void trace_event_register_group(TraceEvent **events)
 for (i = 0; events[i] != NULL; i++) {
 events[i]->id = next_id++;
 if (events[i]->vcpu_id != TRACE_VCPU_EVENT_NONE) {
-events[i]->vcpu_id = next_vcpu_id++;
+events[i]->vcpu_id = trace_next_vcpu_id++;
 }
 }
 event_groups = g_renew(TraceEventGroup, event_groups, nevent_groups + 1);
@@ -299,8 +299,3 @@ char *trace_opt_parse(const char *optarg)
 
 return trace_file;
 }
-
-uint32_t trace_get_vcpu_event_count(void)
-{
-return next_vcpu_id;
-}
diff --git a/trace/control.h b/trace/control.h
index ccaeac8552..6dfbc53c8d 100644
--- a/trace/control.h
+++ b/trace/control.h
@@ -1,7 +1,7 @@
 /*
  * Interface for configuring and controlling the state of tracing events.
  *
- * Copyright (C) 2011-2016 Lluís Vilanova 
+ * Copyright (C) 2011-2017 Lluís Vilanova 
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -237,7 +237,7 @@ char *trace_opt_parse(const char *optarg);
  *
  * Return the number of known vcpu-specific events
  */
-uint32_t trace_get_vcpu_event_count(void);
+static uint32_t trace_get_vcpu_event_count(void);
 
 
 #include "trace/control-internal.h"

[Qemu-devel] [PATCH v7 4/7] exec: [tcg] Use different TBs according to the vCPU's dynamic tracing state

2017-01-13 Thread Lluís Vilanova

Every vCPU now uses a separate set of TBs for each set of dynamic
tracing event state values. Each set of TBs can be used by any number of
vCPUs to maximize TB reuse when vCPUs have the same tracing state.

This feature is later used by tracetool to optimize tracing of guest
code events.

The maximum number of TB sets is defined as 2^E, where E is the number
of events that have the 'vcpu' property (their state is stored in
CPUState->trace_dstate).

For this to work, a change on the dynamic tracing state of a vCPU will
force it to flush its virtual TB cache (which is only indexed by
address), and fall back to the physical TB cache (which now contains the
vCPU's dynamic tracing state as part of the hashing function).

Signed-off-by: Lluís Vilanova 
Reviewed-by: Richard Henderson 
---
 cpu-exec.c|   22 +-
 include/exec/exec-all.h   |5 +
 include/exec/tb-hash-xx.h |8 +++-
 include/exec/tb-hash.h|5 +++--
 include/qemu-common.h |3 +++
 tests/qht-bench.c |2 +-
 trace/control-target.c|1 +
 trace/control.h   |3 +++
 translate-all.c   |   16 ++--
 9 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index 4188fed3c6..36709cba1f 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -261,6 +261,7 @@ struct tb_desc {
 CPUArchState *env;
 tb_page_addr_t phys_page1;
 uint32_t flags;
+TRACE_QHT_VCPU_DSTATE_TYPE trace_vcpu_dstate;
 };
 
 static bool tb_cmp(const void *p, const void *d)
@@ -272,6 +273,7 @@ static bool tb_cmp(const void *p, const void *d)
 tb->page_addr[0] == desc->phys_page1 &&
 tb->cs_base == desc->cs_base &&
 tb->flags == desc->flags &&
+tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
 !atomic_read(&tb->invalid)) {
 /* check next page if needed */
 if (tb->page_addr[1] == -1) {
@@ -293,7 +295,8 @@ static bool tb_cmp(const void *p, const void *d)
 static TranslationBlock *tb_htable_lookup(CPUState *cpu,
   target_ulong pc,
   target_ulong cs_base,
-  uint32_t flags)
+  uint32_t flags,
+  uint32_t trace_vcpu_dstate)
 {
 tb_page_addr_t phys_pc;
 struct tb_desc desc;
@@ -302,10 +305,11 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu,
 desc.env = (CPUArchState *)cpu->env_ptr;
 desc.cs_base = cs_base;
 desc.flags = flags;
+desc.trace_vcpu_dstate = trace_vcpu_dstate;
 desc.pc = pc;
 phys_pc = get_page_addr_code(desc.env, pc);
 desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
-h = tb_hash_func(phys_pc, pc, flags);
+h = tb_hash_func(phys_pc, pc, flags, trace_vcpu_dstate);
 return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
 }
 
@@ -317,16 +321,24 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
 TranslationBlock *tb;
 target_ulong cs_base, pc;
 uint32_t flags;
+unsigned long trace_vcpu_dstate_bitmap;
+TRACE_QHT_VCPU_DSTATE_TYPE trace_vcpu_dstate;
 bool have_tb_lock = false;
 
+bitmap_copy(&trace_vcpu_dstate_bitmap, cpu->trace_dstate,
+trace_get_vcpu_event_count());
+memcpy(&trace_vcpu_dstate, &trace_vcpu_dstate_bitmap,
+   sizeof(trace_vcpu_dstate));
+
 /* we record a subset of the CPU state. It will
always be the same before a given translated block
is executed. */
 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
 if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
- tb->flags != flags)) {
-tb = tb_htable_lookup(cpu, pc, cs_base, flags);
+ tb->flags != flags ||
+ tb->trace_vcpu_dstate != trace_vcpu_dstate)) {
+tb = tb_htable_lookup(cpu, pc, cs_base, flags, trace_vcpu_dstate);
 if (!tb) {
 
 /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
@@ -340,7 +352,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
 /* There's a chance that our desired tb has been translated while
  * taking the locks so we check again inside the lock.
  */
-tb = tb_htable_lookup(cpu, pc, cs_base, flags);
+tb = tb_htable_lookup(cpu, pc, cs_base, flags, trace_vcpu_dstate);
 if (!tb) {
 /* if no translated code available, then translate it now */
 tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 57cd978578..ae74f61ea2 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -200,6 +200,10 @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu, ...)
 #define USE_DIRECT_JUMP
 #endif
 
+/**
+ * TranslationBl

[Qemu-devel] [PATCH v6 2/9] block: Change bdrv_get_encrypted_filename()

2017-01-13 Thread Max Reitz

Instead of returning a pointer to the filename, g_strdup() it. This will
become necessary once we do not have BlockDriverState.filename anymore.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
---
 include/block/block.h |  2 +-
 block.c   | 17 ++---
 monitor.c |  5 -
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 7bcbd05338..3425e9fa79 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -432,7 +432,7 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
 int64_t *cluster_offset,
 unsigned int *cluster_bytes);
 
-const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
+char *bdrv_get_encrypted_filename(BlockDriverState *bs);
 void bdrv_get_backing_filename(BlockDriverState *bs,
char *filename, int filename_size);
 char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
diff --git a/block.c b/block.c
index c63fc1b2da..95015251d5 100644
--- a/block.c
+++ b/block.c
@@ -2858,10 +2858,12 @@ void bdrv_add_key(BlockDriverState *bs, const char 
*key, Error **errp)
 }
 } else {
 if (bdrv_key_required(bs)) {
+char *enc_filename = bdrv_get_encrypted_filename(bs);
 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
   "'%s' (%s) is encrypted",
   bdrv_get_device_or_node_name(bs),
-  bdrv_get_encrypted_filename(bs));
+  enc_filename ?: "");
+g_free(enc_filename);
 }
 }
 }
@@ -3111,14 +3113,15 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState 
*bs)
 return false;
 }
 
-const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
+char *bdrv_get_encrypted_filename(BlockDriverState *bs)
 {
-if (bs->backing && bs->backing->bs->encrypted)
-return bs->backing_file;
-else if (bs->encrypted)
-return bs->filename;
-else
+if (bs->backing && bs->backing->bs->encrypted) {
+return g_strdup(bs->backing_file);
+} else if (bs->encrypted) {
+return g_strdup(bs->filename);
+} else {
 return NULL;
+}
 }
 
 void bdrv_get_backing_filename(BlockDriverState *bs,
diff --git a/monitor.c b/monitor.c
index 0841d436b0..bb3000a2df 100644
--- a/monitor.c
+++ b/monitor.c
@@ -4056,10 +4056,13 @@ int monitor_read_bdrv_key_start(Monitor *mon, 
BlockDriverState *bs,
 BlockCompletionFunc *completion_cb,
 void *opaque)
 {
+char *enc_filename;
 int err;
 
+enc_filename = bdrv_get_encrypted_filename(bs);
 monitor_printf(mon, "%s (%s) is encrypted.\n", bdrv_get_device_name(bs),
-   bdrv_get_encrypted_filename(bs));
+   enc_filename ?: "");
+g_free(enc_filename);
 
 mon->password_completion_cb = completion_cb;
 mon->password_opaque = opaque;
-- 
2.11.0

[Qemu-devel] [PATCH v6 4/9] block: Do not blindly copy filename from file

2017-01-13 Thread Max Reitz

bdrv_refresh_filename() can do the same and it has some checks whether
the filename can actually be inherited or not, so we can let it do its
job in bdrv_open_inherit() after bdrv_open_common() has been called.

The only thing we need to set in bdrv_open_common() is the
exact_filename of a BDS without an underlying file, for two reasons:
(1) It cannot be inherited from an underlying file BDS, so it has to be
set somewhere.
(2) The driver may need the filename in its bdrv_file_open()
implementation (format drivers do not need their own filename,
though they may need their file BDS's name).

Signed-off-by: Max Reitz 
---
 block.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/block.c b/block.c
index 9943d8eff6..19f8a84d03 100644
--- a/block.c
+++ b/block.c
@@ -1116,12 +1116,11 @@ static int bdrv_open_common(BlockDriverState *bs, 
BdrvChild *file,
 bs->detect_zeroes = value;
 }
 
-if (filename != NULL) {
-pstrcpy(bs->filename, sizeof(bs->filename), filename);
+if (!file && filename) {
+pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), filename);
 } else {
-bs->filename[0] = '\0';
+assert(!drv->bdrv_needs_filename);
 }
-pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
 
 bs->drv = drv;
 bs->opaque = g_malloc0(drv->instance_size);
-- 
2.11.0

[Qemu-devel] [PATCH v7 3/7] trace: [tcg] Delay changes to dynamic state when translating

2017-01-13 Thread Lluís Vilanova

This keeps consistency across all decisions taken during translation
when the dynamic state of a vCPU is changed in the middle of translating
some guest code.

Signed-off-by: Lluís Vilanova 
---
 include/qom/cpu.h  |3 +++
 qom/cpu.c  |2 ++
 trace/control-target.c |   21 ++---
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 3f79a8e955..31c3e6018d 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -295,6 +295,8 @@ struct qemu_work_item;
  * @kvm_fd: vCPU file descriptor for KVM.
  * @work_mutex: Lock to prevent multiple access to queued_work_*.
  * @queued_work_first: First asynchronous work pending.
+ * @trace_dstate_delayed: Delayed changes to trace_dstate (includes all changes
+ *to @trace_dstate).
  * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask).
  *
  * State of one CPU core or thread.
@@ -370,6 +372,7 @@ struct CPUState {
  * Dynamically allocated based on bitmap requried to hold up to
  * trace_get_vcpu_event_count() entries.
  */
+unsigned long *trace_dstate_delayed;
 unsigned long *trace_dstate;
 
 /* TODO Move common fields from CPUArchState here. */
diff --git a/qom/cpu.c b/qom/cpu.c
index 03d9190f8c..8e981ac6c7 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -367,6 +367,7 @@ static void cpu_common_initfn(Object *obj)
 QTAILQ_INIT(&cpu->breakpoints);
 QTAILQ_INIT(&cpu->watchpoints);
 
+cpu->trace_dstate_delayed = bitmap_new(trace_get_vcpu_event_count());
 cpu->trace_dstate = bitmap_new(trace_get_vcpu_event_count());
 
 cpu_exec_initfn(cpu);
@@ -375,6 +376,7 @@ static void cpu_common_initfn(Object *obj)
 static void cpu_common_finalize(Object *obj)
 {
 CPUState *cpu = CPU(obj);
+g_free(cpu->trace_dstate_delayed);
 g_free(cpu->trace_dstate);
 }
 
diff --git a/trace/control-target.c b/trace/control-target.c
index 7ebf6e0bcb..dba3b21bb0 100644
--- a/trace/control-target.c
+++ b/trace/control-target.c
@@ -1,13 +1,14 @@
 /*
  * Interface for configuring and controlling the state of tracing events.
  *
- * Copyright (C) 2014-2016 Lluís Vilanova 
+ * Copyright (C) 2014-2017 Lluís Vilanova 
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
  */
 
 #include "qemu/osdep.h"
+#include "qom/cpu.h"
 #include "cpu.h"
 #include "trace.h"
 #include "trace/control.h"
@@ -57,6 +58,13 @@ void trace_event_set_state_dynamic(TraceEvent *ev, bool 
state)
 }
 }
 
+static void trace_event_synchronize_vcpu_state_dynamic(
+CPUState *vcpu, run_on_cpu_data ignored)
+{
+bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed,
+trace_get_vcpu_event_count());
+}
+
 void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
 TraceEvent *ev, bool state)
 {
@@ -69,13 +77,20 @@ void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
 if (state_pre != state) {
 if (state) {
 trace_events_enabled_count++;
-set_bit(vcpu_id, vcpu->trace_dstate);
+set_bit(vcpu_id, vcpu->trace_dstate_delayed);
 (*ev->dstate)++;
 } else {
 trace_events_enabled_count--;
-clear_bit(vcpu_id, vcpu->trace_dstate);
+clear_bit(vcpu_id, vcpu->trace_dstate_delayed);
 (*ev->dstate)--;
 }
+/*
+ * Delay changes until next TB; we want all TBs to be built from a
+ * single set of dstate values to ensure consistency of generated
+ * tracing code.
+ */
+async_run_on_cpu(vcpu, trace_event_synchronize_vcpu_state_dynamic,
+ RUN_ON_CPU_NULL);
 }
 }

[Qemu-devel] [PATCH v6 5/9] block: Add bdrv_filename()

2017-01-13 Thread Max Reitz

Split the part which actually refreshes the BlockDriverState.filename
field off of bdrv_refresh_filename() into a more generic function
bdrv_filename(), which first calls bdrv_refresh_filename() and then
stores a qemu-usable filename in the given buffer instead of
BlockDriverState.filename.

Since bdrv_refresh_filename() therefore no longer refreshes that field,
all of the existing calls to that function have to be replaced by calls
to bdrv_filename() "manually" refreshing the BDS filename field (this is
only temporary).

Signed-off-by: Max Reitz 
---
 include/block/block.h |  1 +
 block.c   | 50 +-
 block/replication.c   |  2 +-
 blockdev.c|  3 ++-
 4 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 3425e9fa79..8abc3da69f 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -252,6 +252,7 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState 
*bs,
 const char *backing_file);
 int bdrv_get_backing_file_depth(BlockDriverState *bs);
 void bdrv_refresh_filename(BlockDriverState *bs);
+char *bdrv_filename(BlockDriverState *bs, char *dest, size_t sz);
 int bdrv_truncate(BlockDriverState *bs, int64_t offset);
 int64_t bdrv_nb_sectors(BlockDriverState *bs);
 int64_t bdrv_getlength(BlockDriverState *bs);
diff --git a/block.c b/block.c
index 19f8a84d03..a631d94702 100644
--- a/block.c
+++ b/block.c
@@ -1731,7 +1731,8 @@ static BlockDriverState 
*bdrv_append_temp_snapshot(BlockDriverState *bs,
 bdrv_append(bs_snapshot, bs);
 
 bs_snapshot->backing_overridden = true;
-bdrv_refresh_filename(bs_snapshot);
+bdrv_filename(bs_snapshot, bs_snapshot->filename,
+  sizeof(bs_snapshot->filename));
 
 g_free(tmp_filename);
 return bs_snapshot;
@@ -1923,7 +1924,7 @@ static BlockDriverState *bdrv_open_inherit(const char 
*filename,
 }
 }
 
-bdrv_refresh_filename(bs);
+bdrv_filename(bs, bs->filename, sizeof(bs->filename));
 
 /* Check if any unknown options were used */
 if (options && (qdict_size(options) != 0)) {
@@ -4101,9 +4102,6 @@ static bool append_significant_runtime_options(QDict *d, 
BlockDriverState *bs)
  *  - full_open_options: Options which, when given when opening a block device
  *   (without a filename), result in a BDS (mostly)
  *   equalling the given one
- *  - filename: If exact_filename is set, it is copied here. Otherwise,
- *  full_open_options is converted to a JSON object, prefixed with
- *  "json:" (for use through the JSON pseudo protocol) and put 
here.
  */
 void bdrv_refresh_filename(BlockDriverState *bs)
 {
@@ -4120,7 +4118,8 @@ void bdrv_refresh_filename(BlockDriverState *bs)
 /* This BDS's file name may depend on any of its children's file names, so
  * refresh those first */
 QLIST_FOREACH(child, &bs->children, next) {
-bdrv_refresh_filename(child->bs);
+bdrv_filename(child->bs, child->bs->filename,
+  sizeof(child->bs->filename));
 
 if (child->role == &child_backing && child->bs->backing_overridden) {
 bs->backing_overridden = true;
@@ -4184,15 +4183,48 @@ void bdrv_refresh_filename(BlockDriverState *bs)
 strcpy(bs->exact_filename, bs->file->bs->exact_filename);
 }
 }
+}
+
+/* First refreshes exact_filename and full_open_options by calling
+ * bdrv_refresh_filename(). Then, if exact_filename is set, it is copied into
+ * the target buffer. Otherwise, full_open_options is converted to a JSON
+ * object, prefixed with "json:" (for use through the JSON pseudo protocol) and
+ * put there.
+ *
+ * If @dest is not NULL, the filename will be truncated to @sz - 1 bytes and
+ * placed there. If @sz > 0, it will always be null-terminated.
+ *
+ * If @dest is NULL, @sz is ignored and a new buffer will be allocated which is
+ * large enough to hold the filename and the trailing '\0'. This buffer is then
+ * returned and has to be freed by the caller when it is no longer needed.
+ *
+ * Returns @dest if it is not NULL, and the newly allocated buffer otherwise.
+ */
+char *bdrv_filename(BlockDriverState *bs, char *dest, size_t sz)
+{
+bdrv_refresh_filename(bs);
+
+if (sz > INT_MAX) {
+sz = INT_MAX;
+}
 
 if (bs->exact_filename[0]) {
-pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
+if (dest) {
+pstrcpy(dest, sz, bs->exact_filename);
+} else {
+dest = g_strdup(bs->exact_filename);
+}
 } else {
 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
-snprintf(bs->filename, sizeof(bs->filename), "json:%s",
- qstring_get_str(json));
+if (dest) {
+snprintf(dest, sz, "json:%s", qstring_get_str(json));
+} else {
+dest = g_strdup_printf("json:%s", qstring_g

[Qemu-devel] [PATCH v7 6/7] trace: [tcg, trivial] Re-align generated code

2017-01-13 Thread Lluís Vilanova

Last patch removed a nesting level in generated code. Re-align all code
generated by backends to be 4-column aligned.

Signed-off-by: Lluís Vilanova 
---
 scripts/tracetool/backend/dtrace.py |4 ++--
 scripts/tracetool/backend/ftrace.py |   20 ++--
 scripts/tracetool/backend/log.py|   19 ++-
 scripts/tracetool/backend/simple.py |4 ++--
 scripts/tracetool/backend/syslog.py |6 +++---
 scripts/tracetool/backend/ust.py|4 ++--
 6 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/scripts/tracetool/backend/dtrace.py 
b/scripts/tracetool/backend/dtrace.py
index 79505c6b1a..c29ffb4fa0 100644
--- a/scripts/tracetool/backend/dtrace.py
+++ b/scripts/tracetool/backend/dtrace.py
@@ -6,7 +6,7 @@ DTrace/SystemTAP backend.
 """
 
 __author__ = "Lluís Vilanova "
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
 __license__= "GPL version 2 or (at your option) any later version"
 
 __maintainer__ = "Stefan Hajnoczi"
@@ -41,6 +41,6 @@ def generate_h_begin(events, group):
 
 
 def generate_h(event, group):
-out('QEMU_%(uppername)s(%(argnames)s);',
+out('QEMU_%(uppername)s(%(argnames)s);',
 uppername=event.name.upper(),
 argnames=", ".join(event.args.names()))
diff --git a/scripts/tracetool/backend/ftrace.py 
b/scripts/tracetool/backend/ftrace.py
index db9fe7ad57..dd0eda4441 100644
--- a/scripts/tracetool/backend/ftrace.py
+++ b/scripts/tracetool/backend/ftrace.py
@@ -29,17 +29,17 @@ def generate_h(event, group):
 if len(event.args) > 0:
 argnames = ", " + argnames
 
-out('{',
-'char ftrace_buf[MAX_TRACE_STRLEN];',
-'int unused __attribute__ ((unused));',
-'int trlen;',
-'if (trace_event_get_state(%(event_id)s)) {',
-'trlen = snprintf(ftrace_buf, MAX_TRACE_STRLEN,',
-' "%(name)s " %(fmt)s "\\n" 
%(argnames)s);',
-'trlen = MIN(trlen, MAX_TRACE_STRLEN - 1);',
-'unused = write(trace_marker_fd, ftrace_buf, trlen);',
-'}',
+out('{',
+'char ftrace_buf[MAX_TRACE_STRLEN];',
+'int unused __attribute__ ((unused));',
+'int trlen;',
+'if (trace_event_get_state(%(event_id)s)) {',
+'trlen = snprintf(ftrace_buf, MAX_TRACE_STRLEN,',
+' "%(name)s " %(fmt)s "\\n" 
%(argnames)s);',
+'trlen = MIN(trlen, MAX_TRACE_STRLEN - 1);',
+'unused = write(trace_marker_fd, ftrace_buf, trlen);',
 '}',
+'}',
 name=event.name,
 args=event.args,
 event_id="TRACE_" + event.name.upper(),
diff --git a/scripts/tracetool/backend/log.py b/scripts/tracetool/backend/log.py
index 4f4a4d38b1..54f0a69886 100644
--- a/scripts/tracetool/backend/log.py
+++ b/scripts/tracetool/backend/log.py
@@ -6,7 +6,7 @@ Stderr built-in backend.
 """
 
 __author__ = "Lluís Vilanova "
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
 __license__= "GPL version 2 or (at your option) any later version"
 
 __maintainer__ = "Stefan Hajnoczi"
@@ -35,14 +35,15 @@ def generate_h(event, group):
 else:
 cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper())
 
-out('if (%(cond)s) {',
-'struct timeval _now;',
-'gettimeofday(&_now, NULL);',
-'qemu_log_mask(LOG_TRACE, "%%d@%%zd.%%06zd:%(name)s " 
%(fmt)s "\\n",',
-'  getpid(),',
-'  (size_t)_now.tv_sec, (size_t)_now.tv_usec',
-'  %(argnames)s);',
-'}',
+out('if (%(cond)s) {',
+'struct timeval _now;',
+'gettimeofday(&_now, NULL);',
+'qemu_log_mask(LOG_TRACE,',
+'  "%%d@%%zd.%%06zd:%(name)s " %(fmt)s "\\n",',
+'  getpid(),',
+'  (size_t)_now.tv_sec, (size_t)_now.tv_usec',
+'  %(argnames)s);',
+'}',
 cond=cond,
 name=event.name,
 fmt=event.fmt.rstrip("\n"),
diff --git a/scripts/tracetool/backend/simple.py 
b/scripts/tracetool/backend/simple.py
index 85f61028e2..9554673cb9 100644
--- a/scripts/tracetool/backend/simple.py
+++ b/scripts/tracetool/backend/simple.py
@@ -6,7 +6,7 @@ Simple built-in backend.
 """
 
 __author__ = "Lluís Vilanova "
-__copyright__  = "Copyright 2012-2014, Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
 __license__= "GPL version 2 or (at your option) any later version"
 
 __maintainer__ = "Stefan Hajnoczi

[Qemu-devel] [PATCH v7 7/7] trace: [trivial] Statically enable all guest events

2017-01-13 Thread Lluís Vilanova

The optimizations of this series makes it feasible to have them
available on all builds.

Signed-off-by: Lluís Vilanova 
---
 trace-events |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/trace-events b/trace-events
index f74e1d3d22..0a0f4d9cd6 100644
--- a/trace-events
+++ b/trace-events
@@ -159,7 +159,7 @@ vcpu guest_cpu_reset(void)
 #
 # Mode: user, softmmu
 # Targets: TCG(all)
-disable vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", 
"vaddr=0x%016"PRIx64" info=%d"
+vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", 
"vaddr=0x%016"PRIx64" info=%d"
 
 # @num: System call number.
 # @arg*: System call argument value.
@@ -168,7 +168,7 @@ disable vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) 
"info=%d", "vaddr=0x
 #
 # Mode: user
 # Targets: TCG(all)
-disable vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, 
uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, 
uint64_t arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" 
arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" 
arg7=0x%016"PRIx64" arg8=0x%016"PRIx64
+vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint64_t 
arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, uint64_t 
arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" 
arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" 
arg7=0x%016"PRIx64" arg8=0x%016"PRIx64
 
 # @num: System call number.
 # @ret: System call result value.
@@ -177,4 +177,4 @@ disable vcpu guest_user_syscall(uint64_t num, uint64_t 
arg1, uint64_t arg2, uint
 #
 # Mode: user
 # Targets: TCG(all)
-disable vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) 
"num=0x%016"PRIx64" ret=0x%016"PRIx64
+vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) "num=0x%016"PRIx64" 
ret=0x%016"PRIx64

[Qemu-devel] [PATCH v6 1/9] block: Always set *file in get_block_status

2017-01-13 Thread Max Reitz

*file should always be set (to NULL, if nothing else) instead of leaving
it dangling sometimes. This should also be documented so callers can
rely on this behavior.

Signed-off-by: Max Reitz 
---
 block/io.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index 4f005623f7..ff693d7f73 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1709,7 +1709,8 @@ typedef struct BdrvCoGetBlockStatusData {
  * beyond the end of the disk image it will be clamped.
  *
  * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file'
- * points to the BDS which the sector range is allocated in.
+ * points to the BDS which the sector range is allocated in. If the block 
driver
+ * does not set 'file', it will be set to NULL.
  */
 static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
  int64_t sector_num,
@@ -1720,6 +1721,8 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status(BlockDriverState *bs,
 int64_t n;
 int64_t ret, ret2;
 
+*file = NULL;
+
 total_sectors = bdrv_nb_sectors(bs);
 if (total_sectors < 0) {
 return total_sectors;
@@ -1744,7 +1747,6 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status(BlockDriverState *bs,
 return ret;
 }
 
-*file = NULL;
 bdrv_inc_in_flight(bs);
 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
 file);
-- 
2.11.0

[Qemu-devel] [PATCH v6 0/9] block: Drop BDS.filename

2017-01-13 Thread Max Reitz

*** This series is based on v4 of my ***
*** "block: Fix some filename generation issues" series ***


The BDS filename field is generally only used when opening disk images
or emitting error or warning messages, the only exception to this rule
is the map command of qemu-img. However, using exact_filename there
instead should not be a problem. Therefore, we can drop the filename
field from the BlockDriverState and use a function instead which builds
the filename from scratch when called.

This is slower than reading a static char array but the problem of that
static array is that it may become obsolete due to changes in any
BlockDriverState or in the BDS graph. Using a function which rebuilds
the filename every time it is called resolves this problem.

The disadvantage of worse performance is negligible, on the other hand.
After patch 3 of this series, which replaces some queries of
BDS.filename by reads from somewhere else (mostly BDS.exact_filename),
the filename field is only used when a disk image is opened or some
message should be emitted, both of which cases do not suffer from the
performance hit.


http://lists.nongnu.org/archive/html/qemu-devel/2015-06/msg07025.html
gives an example of how to achieve an outdated filename, so we do need
this series. We can technically fix it differently (by appropriately
invoking bdrv_refresh_filename()), but that is pretty difficult™. I find
this series simpler and it it's something we wanted to do anyway.

Regarding the fear that this might change current behavior, especially
for libvirt which used filenames to identify nodes in the BDS graph:
Since bdrv_open() already calls bdrv_refresh_filename() today, and
apparently everything works fine, this series will most likely not break
anything. The only thing that will change is if the BDS graph is
dynamically reconfigured at runtime, and in that case it's most likely a
bug fix. Also, I don't think libvirt supports such cases already.


tl;dr: This series is a bug fix and I don't think it'll break legacy
management applications relying on the filename to identify a BDS; as
long as they are not trying to continue that practice with more advanced
configurations (e.g. with Quorum).


v6:
- Rebased after more than a year
- Patch 1 is newly required for patch 6
- Patch 3: Rebase conflicts
- Patch 4: Related bug fix; fits well into this series because it
   requires that format drivers do not query their
   bs->exact_filename, and this series can ensure exactly that
   (and after patch 3, we can be pretty sure this is the case)
- Patch 5: Rebase conflicts, improved a comment, noticed that I'm now
   replacing all of the existing bdrv_refresh_filename() calls
   and changed the commit message accordingly
- Patch 6: Rewritten because the affected code has been rewritten in the
   meantime.
- Patch 7: Rebase conflicts
- Patch 8: Added in this version. It makes the series a bit more
   rigorous, and I think that's good.
- Patch 9: Rebase conflict


git-backport-diff against v5:

Key:
[] : patches are identical
[] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respectively

001/9:[down] 'block: Always set *file in get_block_status'
002/9:[] [-C] 'block: Change bdrv_get_encrypted_filename()'
003/9:[0011] [FC] 'block: Avoid BlockDriverState.filename'
004/9:[down] 'block: Do not blindly copy filename from file'
005/9:[0020] [FC] 'block: Add bdrv_filename()'
006/9:[0041] [FC] 'qemu-img: Use bdrv_filename() for map'
007/9:[0072] [FC] 'block: Drop BlockDriverState.filename'
008/9:[down] 'block: Complete move to pull filename updates'
009/9:[0003] [FC] 'iotests: Test changed Quorum filename'


Max Reitz (9):
  block: Always set *file in get_block_status
  block: Change bdrv_get_encrypted_filename()
  block: Avoid BlockDriverState.filename
  block: Do not blindly copy filename from file
  block: Add bdrv_filename()
  qemu-img: Use bdrv_filename() for map
  block: Drop BlockDriverState.filename
  block: Complete move to pull filename updates
  iotests: Test changed Quorum filename

 include/block/block.h |  3 +-
 include/block/block_int.h | 13 ++-
 block.c   | 96 ++-
 block/commit.c|  4 +-
 block/file-posix.c|  6 +--
 block/file-win32.c|  4 +-
 block/gluster.c   |  3 +-
 block/io.c|  6 ++-
 block/mirror.c| 16 ++--
 block/nbd.c   |  5 ++-
 block/nfs.c   |  4 +-
 block/qapi.c  |  4 +-
 block/raw-format.c|  4 +-
 block/replication.c   |  2 -
 block/vhdx-log.c  |  7 +++-
 block/vmdk.c  | 24 +---
 block/vpc.c   |  7 +++-
 blockdev.c| 33 +++-
 monitor.c |  5 ++-
 qemu-img.c| 3

[Qemu-devel] [PATCH v6 3/9] block: Avoid BlockDriverState.filename

2017-01-13 Thread Max Reitz

In places which directly pass a filename to the OS, we should not use
the filename field at all but exact_filename instead (although the
former currently equals the latter if that is set).

In raw_open_common(), we do not need to access BDS.filename because we
already have a local variable pointing to the filename.

Signed-off-by: Max Reitz 
---
 block.c| 5 +++--
 block/file-posix.c | 6 +++---
 block/file-win32.c | 4 ++--
 block/gluster.c| 3 ++-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/block.c b/block.c
index 95015251d5..9943d8eff6 100644
--- a/block.c
+++ b/block.c
@@ -1146,8 +1146,9 @@ static int bdrv_open_common(BlockDriverState *bs, 
BdrvChild *file,
 if (ret < 0) {
 if (local_err) {
 error_propagate(errp, local_err);
-} else if (bs->filename[0]) {
-error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
+} else if (bs->exact_filename[0]) {
+error_setg_errno(errp, -ret, "Could not open '%s'",
+ bs->exact_filename);
 } else {
 error_setg_errno(errp, -ret, "Could not open image");
 }
diff --git a/block/file-posix.c b/block/file-posix.c
index 28b47d977b..2e3acd622d 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -590,7 +590,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
 
 /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
 if (rs->fd == -1) {
-const char *normalized_filename = state->bs->filename;
+const char *normalized_filename = state->bs->exact_filename;
 ret = raw_normalize_devicepath(&normalized_filename);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Could not normalize device path");
@@ -2075,7 +2075,7 @@ static bool hdev_is_sg(BlockDriverState *bs)
 int sg_version;
 int ret;
 
-if (stat(bs->filename, &st) < 0 || !S_ISCHR(st.st_mode)) {
+if (stat(bs->exact_filename, &st) < 0 || !S_ISCHR(st.st_mode)) {
 return false;
 }
 
@@ -2510,7 +2510,7 @@ static int cdrom_reopen(BlockDriverState *bs)
  */
 if (s->fd >= 0)
 qemu_close(s->fd);
-fd = qemu_open(bs->filename, s->open_flags, 0644);
+fd = qemu_open(bs->exact_filename, s->open_flags, 0644);
 if (fd < 0) {
 s->fd = -1;
 return -EIO;
diff --git a/block/file-win32.c b/block/file-win32.c
index 800fabdd72..040c71830e 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -457,7 +457,7 @@ static void raw_close(BlockDriverState *bs)
 
 CloseHandle(s->hfile);
 if (bs->open_flags & BDRV_O_TEMPORARY) {
-unlink(bs->filename);
+unlink(bs->exact_filename);
 }
 }
 
@@ -525,7 +525,7 @@ static int64_t raw_get_allocated_file_size(BlockDriverState 
*bs)
   DWORD * high);
 get_compressed_t get_compressed;
 struct _stati64 st;
-const char *filename = bs->filename;
+const char *filename = bs->exact_filename;
 /* WinNT support GetCompressedFileSize to determine allocate size */
 get_compressed =
 (get_compressed_t) GetProcAddress(GetModuleHandle("kernel32"),
diff --git a/block/gluster.c b/block/gluster.c
index ded13fb5fb..f6c2484c11 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -878,7 +878,8 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState 
*state,
 gconf->has_debug = true;
 gconf->logfile = g_strdup(s->logfile);
 gconf->has_logfile = true;
-reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
+reop_s->glfs = qemu_gluster_init(gconf, state->bs->exact_filename, NULL,
+ errp);
 if (reop_s->glfs == NULL) {
 ret = -errno;
 goto exit;
-- 
2.11.0

Re: [Qemu-devel] [PATCH] libvhost-user: Start VQs on SET_VRING_CALL

2017-01-13 Thread Felipe Franciosi


> On 13 Jan 2017, at 10:18, Michael S. Tsirkin  wrote:
> 
> On Fri, Jan 13, 2017 at 05:15:22PM +, Felipe Franciosi wrote:
>> 
>>> On 13 Jan 2017, at 09:04, Michael S. Tsirkin  wrote:
>>> 
>>> On Fri, Jan 13, 2017 at 03:09:46PM +, Felipe Franciosi wrote:
 Hi Marc-Andre,
 
> On 13 Jan 2017, at 07:03, Marc-André Lureau  wrote:
> 
> Hi
> 
> - Original Message -
>> Currently, VQs are started as soon as a SET_VRING_KICK is received. That
>> is too early in the VQ setup process, as the backend might not yet have
> 
> I think we may want to reconsider queue_set_started(), move it elsewhere, 
> since kick/call fds aren't mandatory to process the rings.
 
 Hmm. The fds aren't mandatory, but I imagine in that case we should still 
 receive SET_VRING_KICK/CALL messages without an fd (ie. with the 
 VHOST_MSG_VQ_NOFD_MASK flag set). Wouldn't that be the case?
>>> 
>>> Please look at docs/specs/vhost-user.txt, Starting and stopping rings
>>> 
>>> The spec says:
>>> Client must start ring upon receiving a kick (that is, detecting that
>>> file descriptor is readable) on the descriptor specified by
>>> VHOST_USER_SET_VRING_KICK, and stop ring upon receiving
>>> VHOST_USER_GET_VRING_BASE.
>> 
>> Yes I have seen the spec, but there is a race with the current libvhost-user 
>> code which needs attention. My initial proposal (which got turned down) was 
>> to send a spurious notification upon seeing a callfd. Then I came up with 
>> this proposal. See below.
>> 
>>> 
>>> 
> 
>> a callfd to notify in case it received a kick and fully processed the
>> request/command. This patch only starts a VQ when a SET_VRING_CALL is
>> received.
> 
> I don't like that much, as soon as the kick fd is received, it should 
> start polling it imho. callfd is optional, it may have one and not the 
> other.
 
 So the question is whether we should be receiving a SET_VRING_CALL anyway 
 or not, regardless of an fd being sent. (I think we do, but I haven't done 
 extensive testing with other device types.)
>>> 
>>> I would say not, only KICK is mandatory and that is also not enough
>>> to process ring. You must wait for it to be readable.
>> 
>> The problem is that Qemu takes time between sending the kickfd and the 
>> callfd. Hence the race. Consider this scenario:
>> 
>> 1) Guest configures the device
>> 2) Guest put a request on a virtq
>> 3) Guest kicks
>> 4) Qemu starts configuring the backend
>> 4.a) Qemu sends the masked callfds
>> 4.b) Qemu sends the virtq sizes and addresses
>> 4.c) Qemu sends the kickfds
>> 
>> (When using MQ, Qemu will only send the callfd once all VQs are configured)
>> 
>> 5) The backend starts listening on the kickfd upon receiving it
>> 6) The backend picks up the guest's request
>> 7) The backend processes the request
>> 8) The backend puts the response on the used ring
>> 9) The backend notifies the masked callfd
>> 
>> 4.d) Qemu sends the callfds
>> 
>> At which point the guest missed the notification and gets stuck.
>> 
>> Perhaps you prefer my initial proposal of sending a spurious notification 
>> when the backend sees a callfd?
>> 
>> Felipe
> 
> I thought we read the masked callfd when we unmask it,
> and forward the interrupt. See kvm_irqfd_assign:
> 
>/*
> * Check if there was an event already pending on the eventfd
> * before we registered, and trigger it as if we didn't miss it.
> */
>events = f.file->f_op->poll(f.file, &irqfd->pt);
> 
>if (events & POLLIN)
>schedule_work(&irqfd->inject);
> 
> 
> 
> Is this a problem you observe in practice?

Thanks for pointing out to this code; I wasn't aware of it.

Indeed I'm encountering it in practice. And I've checked that my kernel has the 
code above.

Starts to sound like a race:
Qemu registers the new notifier with kvm
Backend kicks the (now no longer registered) maskfd
Qemu sends the new callfd to the application

It's not hard to repro. How could this situation be avoided?

Cheers,
Felipe


> 
>> 
>>> 
> 
> Perhaps it's best for now to delay the callfd notification with a flag 
> until it is received?
 
 The other idea is to always kick when we receive the callfd. I remember 
 discussing that alternative with you before libvhost-user went in. The 
 protocol says both the driver and the backend must handle spurious kicks. 
 This approach also fixes the bug.
 
 I'm happy with whatever alternative you want, as long it makes 
 libvhost-user usable for storage devices.
 
 Thanks,
 Felipe
 
 
> 
> 
>> Signed-off-by: Felipe Franciosi 
>> ---
>> contrib/libvhost-user/libvhost-user.c | 26 +-
>> 1 file changed, 13 insertions(+), 13 deletions(-)
>> 
>> diff --git a/contrib/libvhost-user/libvhost-user.c
>> b/contrib/libvhost-user/libvhost-user.c

Re: [Qemu-devel] [PATCH] virtio_crypto: header update

2017-01-13 Thread Gonglei (Arei)


> -Original Message-
> From: Michael S. Tsirkin [mailto:m...@redhat.com]
> Sent: Saturday, January 14, 2017 12:18 AM
> To: qemu-devel@nongnu.org
> Cc: Gonglei (Arei)
> Subject: [PATCH] virtio_crypto: header update
> 
> Update header from latest linux driver.  Session creation structs gain
> padding to make them same size. Formatting cleanups.
> 
> Signed-off-by: Michael S. Tsirkin 
> ---
>  include/standard-headers/linux/virtio_crypto.h | 481 
> +
>  1 file changed, 251 insertions(+), 230 deletions(-)
> 
Tested-by: Gonglei 
Reviewed-by: Gonglei

Re: [Qemu-devel] [virtio-dev] Re: [PATCH v15 0/2] virtio-crypto: virtio crypto device specification

2017-01-13 Thread Gonglei (Arei)

> 
> On Fri, Jan 13, 2017 at 02:54:29AM +, Gonglei (Arei) wrote:
> >
> > >
> > > On Thu, Jan 12, 2017 at 12:26:24PM +, Gonglei (Arei) wrote:
> > > > Hi,
> > > >
> > > >
> > > > >
> > > > > On 01/04/2017 11:10 AM, Gonglei (Arei) wrote:
> > > > > > Hi all,
> > > > > >
> > > > > > I attach the diff files between v14 and v15 for better review.
> > > > > >
> > > > > Hi,
> > > > >
> > > > > only had a quick look. Will try to come back to this later.
> > > > >
> > > > That's cool.
> > > >
> > > > > > diff --git a/virtio-crypto.tex b/virtio-crypto.tex
> > > > > > index 9f7faf0..884ee95 100644
> > > > > > --- a/virtio-crypto.tex
> > > > > > +++ b/virtio-crypto.tex
> > > > > > @@ -2,8 +2,8 @@
> > > > > >
> > > > > >  The virtio crypto device is a virtual cryptography device as well 
> > > > > > as a
> kind
> > > of
> > > > > >  virtual hardware accelerator for virtual machines. The encryption
> and
> > > > > > -decryption requests are placed in the data queue and are ultimately
> > > handled
> > > > > by the
> > > > > > -backend crypto accelerators. The second queue is the control queue
> used
> > > to
> > > > > create
> > > > > > +decryption requests are placed in any of the data active queues and
> are
> > > > > ultimately handled by the
> > > > > s/data active/active data/
> > > > > > +backend crypto accelerators. The second kind of queue is the 
> > > > > > control
> > > queue
> > > > > used to create
> > > > > >  or destroy sessions for symmetric algorithms and will control some
> > > > > advanced
> > > > > >  features in the future. The virtio crypto device provides the 
> > > > > > following
> > > crypto
> > > > > >  services: CIPHER, MAC, HASH, and AEAD.
> > > > >
> > > > > [..]
> > > > >
> > > > > > ===The below diff shows the changes of add
> non-session
> > > mode
> > > > > support:
> > > > > >
> > > > > > diff --git a/virtio-crypto.tex b/virtio-crypto.tex
> > > > > > index 884ee95..44819f9 100644
> > > > > > --- a/virtio-crypto.tex
> > > > > > +++ b/virtio-crypto.tex
> > > > > > @@ -26,7 +26,10 @@ N is set by \field{max_dataqueues}.
> > > > > >
> > > > > >  \subsection{Feature bits}\label{sec:Device Types / Crypto Device /
> > > Feature
> > > > > bits}
> > > > > >
> > > > > > -None currently defined.
> > > > > > +VIRTIO_CRYPTO_F_CIPHER_SESSION_MODE (1) Session mode is
> > > available
> > > > > for CIPHER service.
> > > > > > +VIRTIO_CRYPTO_F_HASH_SESSION_MODE (2) Session mode is
> available
> > > for
> > > > > HASH service.
> > > > > > +VIRTIO_CRYPTO_F_MAC_SESSION_MODE (3) Session mode is
> available
> > > for
> > > > > MAC service.
> > > > > > +VIRTIO_CRYPTO_F_AEAD_SESSION_MODE (4) Session mode is
> available
> > > for
> > > > > AEAD service.
> > > > > >
> > > > > >  \subsection{Device configuration layout}\label{sec:Device Types /
> > > Crypto
> > > > > Device / Device configuration layout}
> > > > > >
> > > > > > @@ -208,6 +211,9 @@ Operation parameters are algorithm-specific
> > > > > parameters, output data is the
> > > > > >  data that should be utilized in operations, and input data is 
> > > > > > equal to
> > > > > >  "operation result + result data".
> > > > > >
> > > > > > +The device can support both session mode (See \ref{sec:Device Types
> /
> > > > > Crypto Device / Device Operation / Control Virtqueue / Session
> operation})
> > > and
> > > > > non-session mode, for example,
> > > > > > +As VIRTIO_CRYPTO_F_CIPHER_SESSION feature bit is negotiated,
> the
> > > driver
> > > > > can use session mode for CIPHER service, otherwise it can only use
> > > non-session
> > > > > mode.
> > > > > > +
> > > > >
> > > > > As far as I understand you are adding non-session mode to the mix but
> > > > > providing feature bits for session mode. Would this render the the
> current
> > > > > implementation non-compliant?
> > > > >
> > > > You are right, shall we use feature bits for non-session mode for
> compliancy?
> > > > Or because the spec is on the fly, and some structures in the
> virtio_crypto.h
> > > need to
> > > > be modified, can we keep the compliancy completely?
> > > >
> > > > Thanks,
> > > > -Gonglei
> > >
> > > Since there's a linux driver upstream you must at least
> > > keep compatibility with that.
> > >
> > Sure. We use feature bits for non-session mode then.
> > For structures modification, do we need to do some specific
> > actions for compatibility?  Take CIPHER service as an example:
> >
> > The present structure:
> >
> > struct virtio_crypto_cipher_para {
> > /*
> >  * Byte Length of valid IV/Counter data pointed to by the below iv data.
> >  *
> >  * For block ciphers in CBC or F8 mode, or for Kasumi in F8 mode, or for
> >  *   SNOW3G in UEA2 mode, this is the length of the IV (which
> >  *   must be the same as the block length of the cipher).
> >  * For block ciphers in CTR mode, this is the length of the counter
> >  *   (which must be the same as the block length of the cipher).
> >  */
> > le32 iv_len;
> > /* leng

Re: [Qemu-devel] [PATCH RFC v3 00/14] VT-d: vfio enablement and misc enhances

2017-01-13 Thread Peter Xu

On Fri, Jan 13, 2017 at 05:58:02PM +0200, Michael S. Tsirkin wrote:
> On Fri, Jan 13, 2017 at 11:06:26AM +0800, Peter Xu wrote:
> > v3:
> > - fix style error reported by patchew
> > - fix comment in domain switch patch: use "IOMMU address space" rather
> >   than "IOMMU region" [Kevin]
> > - add ack-by for Paolo in patch:
> >   "memory: add section range info for IOMMU notifier"
> >   (this is seperately collected besides this thread)
> > - remove 3 patches which are merged already (from Jason)
> > - rebase to master b6c0897
> 
> So 1-6 look like nice cleanups to me. Should I merge them now?

That'll be great if you'd like to merge them. Then I can further
shorten this series for the next post.

Regarding to the error_report() issue that Jason has mentioned, I can
touch them up in the future when needed - after all, most of the patch
content are about converting DPRINT()s into traces.

Thanks!

-- peterx

Re: [Qemu-devel] [PATCH v7 2/7] trace: Make trace_get_vcpu_event_count() inlinable

2017-01-13 Thread Richard Henderson


On 01/13/2017 12:48 PM, Lluís Vilanova wrote:

uring and controlling the state of tracing events.
  *
- * Copyright (C) 2011-2016 Lluís Vilanova 
+ * Copyright (C) 2011-2017 Lluís Vilanova 
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -237,7 +237,7 @@ char *trace_opt_parse(const char *optarg);
  *
  * Return the number of known vcpu-specific events
  */
-uint32_t trace_get_vcpu_event_count(void);
+static uint32_t trace_get_vcpu_event_count(void);



Why is this declaration still here?  It's redundant with the inline.


r~

Re: [Qemu-devel] [PATCH v7 3/7] trace: [tcg] Delay changes to dynamic state when translating

2017-01-13 Thread Richard Henderson


On 01/13/2017 12:48 PM, Lluís Vilanova wrote:

This keeps consistency across all decisions taken during translation
when the dynamic state of a vCPU is changed in the middle of translating
some guest code.

Signed-off-by: Lluís Vilanova 
---
 include/qom/cpu.h  |3 +++
 qom/cpu.c  |2 ++
 trace/control-target.c |   21 ++---
 3 files changed, 23 insertions(+), 3 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 0/2] virtio: revert virtio_queue_set_notification() nesting

2017-01-13 Thread Richard Henderson


On 01/12/2017 03:46 AM, Stefan Hajnoczi wrote:

The virtio_queue_set_notification() nesting introduced for AioContext polling
raised an assertion with virtio-net (even in non-polling mode).  Converting
virtio-net and virtio-crypto to use virtio_queue_set_notification() in a
nesting fashion would be invasive and isn't worth it.

Patch 1 contains the revert to resolve the bug that Doug noticed.

Patch 2 is a less efficient but safe alternative.

Stefan Hajnoczi (2):
  Revert "virtio: turn vq->notification into a nested counter"
  virtio: disable notifications again after poll succeeded

 hw/virtio/virtio.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)


Tested-by: Richard Henderson 

This problem affected Alpha as well.  I tested the two patches together.


r~

[Qemu-devel] [PATCH] cryptodev: setiv only when really need

2017-01-13 Thread Longpeng(Mike)

ECB mode cipher doesn't need IV, if we setiv for it then qemu
crypto API would report "Expected IV size 0 not **", so we should
setiv only when the cipher algos really need.

Signed-off-by: Longpeng(Mike) 
---
 backends/cryptodev-builtin.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c
index 82a068e..b24a299 100644
--- a/backends/cryptodev-builtin.c
+++ b/backends/cryptodev-builtin.c
@@ -320,10 +320,12 @@ static int cryptodev_builtin_sym_operation(
 
 sess = builtin->sessions[op_info->session_id];
 
-ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
-   op_info->iv_len, errp);
-if (ret < 0) {
-return -VIRTIO_CRYPTO_ERR;
+if (op_info->iv_len > 0) {
+ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
+   op_info->iv_len, errp);
+if (ret < 0) {
+return -VIRTIO_CRYPTO_ERR;
+}
 }
 
 if (sess->direction == VIRTIO_CRYPTO_OP_ENCRYPT) {
-- 
1.8.3.1

[Qemu-devel] [PATCH] hw/core/null-machine: Add the possibility to instantiate a CPU, RAM and kernel

2017-01-13 Thread Thomas Huth

Sometimes it is useful to have just a machine with CPU and RAM, without
any further hardware in it, e.g. if you just want to do some instruction
debugging for TCG with a remote GDB attached to QEMU, or run some embedded
code with the "-semihosting" QEMU parameter. qemu-system-m68k already
features a "dummy" machine, and xtensa a "sim" machine for exactly this
purpose.
All target architectures have nowadays also a "none" machine, which would
be a perfect match for this, too - but it currently does not allow to add
CPU, RAM or a kernel yet. Thus let's add these possibilities in a generic
way to the "none" machine, too, so that we hopefully do not need additional
"dummy" machines in the future anymore (and maybe can also get rid of the
already existing "dummy"/"sim" machines one day).
Note that the default behaviour of the "none" machine is not changed, i.e.
no CPU and no RAM is instantiated by default. You've explicitely got to
specify the CPU model with "-cpu" and the amount of RAM with "-m" to get
these new features.

Signed-off-by: Thomas Huth 
---
 hw/core/Makefile.objs  |  2 +-
 hw/core/null-machine.c | 81 +-
 2 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index a4c94e5..0b6c0f1 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -12,7 +12,6 @@ common-obj-$(CONFIG_XILINX_AXI) += stream.o
 common-obj-$(CONFIG_PTIMER) += ptimer.o
 common-obj-$(CONFIG_SOFTMMU) += sysbus.o
 common-obj-$(CONFIG_SOFTMMU) += machine.o
-common-obj-$(CONFIG_SOFTMMU) += null-machine.o
 common-obj-$(CONFIG_SOFTMMU) += loader.o
 common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
 common-obj-$(CONFIG_SOFTMMU) += register.o
@@ -20,3 +19,4 @@ common-obj-$(CONFIG_SOFTMMU) += or-irq.o
 common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o
 
 obj-$(CONFIG_SOFTMMU) += generic-loader.o
+obj-$(CONFIG_SOFTMMU) += null-machine.o
diff --git a/hw/core/null-machine.c b/hw/core/null-machine.c
index 0351ba7..b2468ed 100644
--- a/hw/core/null-machine.c
+++ b/hw/core/null-machine.c
@@ -13,18 +13,97 @@
 
 #include "qemu/osdep.h"
 #include "qemu-common.h"
+#include "qemu/error-report.h"
 #include "hw/hw.h"
 #include "hw/boards.h"
+#include "hw/loader.h"
+#include "sysemu/sysemu.h"
+#include "exec/address-spaces.h"
+#include "cpu.h"
+#include "elf.h"
+
+#ifdef TARGET_WORDS_BIGENDIAN
+#define LOAD_ELF_ENDIAN_FLAG 1
+#else
+#define LOAD_ELF_ENDIAN_FLAG 0
+#endif
+
+static hwaddr cpu_initial_pc;
+
+static uint64_t translate_phys_addr(void *opaque, uint64_t addr)
+{
+return cpu_get_phys_page_debug(CPU(opaque), addr);
+}
+
+static void machine_none_load_kernel(CPUState *cpu, const char *kernel_fname,
+ ram_addr_t ram_size)
+{
+uint64_t elf_entry;
+int kernel_size;
+
+if (!ram_size) {
+error_report("You need RAM for loading a kernel");
+return;
+}
+
+kernel_size = load_elf(kernel_fname, translate_phys_addr, cpu, &elf_entry,
+   NULL, NULL, LOAD_ELF_ENDIAN_FLAG, EM_NONE, 0, 0);
+cpu_initial_pc = elf_entry;
+if (kernel_size < 0) {
+kernel_size = load_uimage(kernel_fname, &cpu_initial_pc, NULL, NULL,
+  NULL, NULL);
+}
+if (kernel_size < 0) {
+kernel_size = load_image_targphys(kernel_fname, 0, ram_size);
+}
+if (kernel_size < 0) {
+error_report("Could not load kernel '%s'", kernel_fname);
+return;
+}
+}
+
+static void machine_none_cpu_reset(void *opaque)
+{
+CPUState *cpu = CPU(opaque);
+
+cpu_reset(cpu);
+cpu_set_pc(cpu, cpu_initial_pc);
+}
 
 static void machine_none_init(MachineState *machine)
 {
+ram_addr_t ram_size = machine->ram_size;
+MemoryRegion *ram;
+CPUState *cpu = NULL;
+
+/* Initialize CPU (if a model has been specified) */
+if (machine->cpu_model) {
+cpu = cpu_init(machine->cpu_model);
+if (!cpu) {
+error_report("Unable to initialize CPU");
+exit(1);
+}
+qemu_register_reset(machine_none_cpu_reset, cpu);
+cpu_reset(cpu);
+}
+
+/* RAM at address zero */
+if (ram_size) {
+ram = g_new(MemoryRegion, 1);
+memory_region_allocate_system_memory(ram, NULL, "ram", ram_size);
+memory_region_add_subregion(get_system_memory(), 0, ram);
+}
+
+if (machine->kernel_filename) {
+machine_none_load_kernel(cpu, machine->kernel_filename, ram_size);
+}
 }
 
 static void machine_none_machine_init(MachineClass *mc)
 {
 mc->desc = "empty machine";
 mc->init = machine_none_init;
-mc->max_cpus = 0;
+mc->default_ram_size = 0;
 }
 
 DEFINE_MACHINE("none", machine_none_machine_init)
-- 
1.8.3.1

[Qemu-devel] [PATCH] target-openrisc: Fix exception handling status registers

2017-01-13 Thread Stafford Horne

I am working on testing instruction emulation patches for the linux
kernel. During testing I found these 2 issues:

 - sets DSX (delay slot exception) but never clears it
 - EEAR for illegal insns should point to the bad exception (as per
   openrisc spec) but its not

This patch fixes these two issues by clearing the DSX flag when not in a
delay slot and by setting EEAR to exception PC when handling illegal
instruction exceptions.

After this patch the openrisc kernel with latest patches boots great on
qemu and instruction emulation works.

Cc: qemu-triv...@nongnu.org
Cc: openr...@lists.librecores.org
Signed-off-by: Stafford Horne 
---
 target/openrisc/interrupt.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c
index 5fe3f11..e1b0142 100644
--- a/target/openrisc/interrupt.c
+++ b/target/openrisc/interrupt.c
@@ -38,10 +38,17 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
 env->flags &= ~D_FLAG;
 env->sr |= SR_DSX;
 env->epcr -= 4;
+} else {
+env->sr &= ~SR_DSX;
 }
 if (cs->exception_index == EXCP_SYSCALL) {
 env->epcr += 4;
 }
+/* When we have an illegal instruction the error effective address
+   shall be set to the illegal instruction address.  */
+if (cs->exception_index == EXCP_ILLEGAL) {
+env->eear = env->pc;
+}
 
 /* For machine-state changed between user-mode and supervisor mode,
we need flush TLB when we enter&exit EXCP.  */
-- 
2.9.3

Re: [Qemu-devel] [PATCH v3] monitor: Fix crashes when using HMP commands without CPU

2017-01-13 Thread Markus Armbruster

Thomas Huth  writes:

> On 12.01.2017 17:22, Markus Armbruster wrote:
>> Thomas Huth  writes:
>> 
>>> When running certain HMP commands ("info registers", "info cpustats",
>>> "nmi", "memsave" or dumping virtual memory) with the "none" machine,
>>> QEMU crashes with a segmentation fault. This happens because the "none"
>>> machine does not have any CPUs by default, but these HMP commands did
>>> not check for a valid CPU pointer yet. Add such checks now, so we get
>>> an error message about the missing CPU instead.
>>>
>>> Reviewed-by: Dr. David Alan Gilbert 
>>> Signed-off-by: Thomas Huth 
>>> ---
>>>  v3:
>>>  - Use UNASSIGNED_CPU_INDEX instead of hard-coded -1
>>>  v2:
>>>  - Added more checks to cover "nmi" and "memsave", too
>>>
>>>  hmp.c |  8 +++-
>>>  monitor.c | 37 +++--
>>>  2 files changed, 38 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/hmp.c b/hmp.c
>>> index b869617..b1c503a 100644
>>> --- a/hmp.c
>>> +++ b/hmp.c
>>> @@ -1013,8 +1013,14 @@ void hmp_memsave(Monitor *mon, const QDict *qdict)
>>>  const char *filename = qdict_get_str(qdict, "filename");
>>>  uint64_t addr = qdict_get_int(qdict, "val");
>>>  Error *err = NULL;
>>> +int cpu_index = monitor_get_cpu_index();
>>>  
>>> -qmp_memsave(addr, size, filename, true, monitor_get_cpu_index(), &err);
>>> +if (cpu_index < 0) {
>>> +monitor_printf(mon, "No CPU available\n");
>>> +return;
>>> +}
>>> +
>>> +qmp_memsave(addr, size, filename, true, cpu_index, &err);
>>>  hmp_handle_error(mon, &err);
>>>  }
>>>  
>>> diff --git a/monitor.c b/monitor.c
>>> index 0841d43..17121ff 100644
>>> --- a/monitor.c
>>> +++ b/monitor.c
>>> @@ -1025,6 +1025,9 @@ int monitor_set_cpu(int cpu_index)
>>>  CPUState *mon_get_cpu(void)
>>>  {
>>>  if (!cur_mon->mon_cpu) {
>>> +if (!first_cpu) {
>>> +return NULL;
>>> +}
>>>  monitor_set_cpu(first_cpu->cpu_index);
>>>  }
>>>  cpu_synchronize_state(cur_mon->mon_cpu);
>>> @@ -1033,17 +1036,27 @@ CPUState *mon_get_cpu(void)
>>>  
>>>  CPUArchState *mon_get_cpu_env(void)
>>>  {
>>> -return mon_get_cpu()->env_ptr;
>>> +CPUState *cs = mon_get_cpu();
>>> +
>>> +return cs ? cs->env_ptr : NULL;
>>>  }
>>>  
>>>  int monitor_get_cpu_index(void)
>>>  {
>>> -return mon_get_cpu()->cpu_index;
>>> +CPUState *cs = mon_get_cpu();
>>> +
>>> +return cs ? cs->cpu_index : UNASSIGNED_CPU_INDEX;
>>>  }
>>>  
>>>  static void hmp_info_registers(Monitor *mon, const QDict *qdict)
>>>  {
>>> -cpu_dump_state(mon_get_cpu(), (FILE *)mon, monitor_fprintf, 
>>> CPU_DUMP_FPU);
>>> +CPUState *cs = mon_get_cpu();
>>> +
>>> +if (!cs) {
>>> +monitor_printf(mon, "No CPU available\n");
>>> +return;
>>> +}
>>> +cpu_dump_state(cs, (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU);
>>>  }
>>>  
>>>  static void hmp_info_jit(Monitor *mon, const QDict *qdict)
>>> @@ -1076,7 +1089,13 @@ static void hmp_info_history(Monitor *mon, const 
>>> QDict *qdict)
>>>  
>>>  static void hmp_info_cpustats(Monitor *mon, const QDict *qdict)
>>>  {
>>> -cpu_dump_statistics(mon_get_cpu(), (FILE *)mon, &monitor_fprintf, 0);
>>> +CPUState *cs = mon_get_cpu();
>>> +
>>> +if (!cs) {
>>> +monitor_printf(mon, "No CPU available\n");
>>> +return;
>>> +}
>>> +cpu_dump_statistics(cs, (FILE *)mon, &monitor_fprintf, 0);
>>>  }
>>>  
>>>  static void hmp_info_trace_events(Monitor *mon, const QDict *qdict)
>>> @@ -1235,6 +1254,12 @@ static void memory_dump(Monitor *mon, int count, int 
>>> format, int wsize,
>>>  int l, line_size, i, max_digits, len;
>>>  uint8_t buf[16];
>>>  uint64_t v;
>>> +CPUState *cs = mon_get_cpu();
>>> +
>>> +if (!cs && (format == 'i' || !is_physical)) {
>>> +monitor_printf(mon, "Can not dump without CPU\n");
>>> +return;
>>> +}
>> 
>> This is basically "if (we're going to dereference cs)".  Not so nice, in
>> particular since the dereferences are hidden inside mon_get_cpu_env()
>> calls.  I guess it'll do.
>> 
>>>  
>>>  if (format == 'i') {
>>>  int flags = 0;
>>> @@ -1264,7 +1289,7 @@ static void memory_dump(Monitor *mon, int count, int 
>>> format, int wsize,
>>>  flags = msr_le << 16;
>>>  flags |= env->bfd_mach;
>>>  #endif
>>> -monitor_disas(mon, mon_get_cpu(), addr, count, is_physical, flags);
>>> +monitor_disas(mon, cs, addr, count, is_physical, flags);
>>>  return;
>>>  }
>>>  
>>> @@ -1303,7 +1328,7 @@ static void memory_dump(Monitor *mon, int count, int 
>>> format, int wsize,
>>>  if (is_physical) {
>>>  cpu_physical_memory_read(addr, buf, l);
>>>  } else {
>>> -if (cpu_memory_rw_debug(mon_get_cpu(), addr, buf, l, 0) < 0) {
>>> +if (cpu_memory_rw_debug(cs, addr, buf, l, 0) < 0) {
>>>  monitor_printf(mon, " Cannot access memory\n");
>>>  break;
>>>

Re: [Qemu-devel] [PATCH RFC v3 09/14] memory: introduce memory_region_notify_one()

2017-01-13 Thread Jason Wang




On 2017年01月13日 11:06, Peter Xu wrote:

Generalizing the notify logic in memory_region_notify_iommu() into a
single function. This can be further used in customized replay()
functions for IOMMUs.

Signed-off-by: Peter Xu 
---
  include/exec/memory.h | 15 +++
  memory.c  | 29 ++---
  2 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 2233f99..f367e54 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -669,6 +669,21 @@ void memory_region_notify_iommu(MemoryRegion *mr,
  IOMMUTLBEntry entry);
  
  /**

+ * memory_region_notify_one: notify a change in an IOMMU translation
+ *   entry to a single notifier
+ *
+ * This works just like memory_region_notify_iommu(), but it only
+ * notifies a specific notifier, not all of them.
+ *
+ * @notifier: the notifier to be notified
+ * @entry: the new entry in the IOMMU translation table.  The entry
+ * replaces all old entries for the same virtual I/O address range.
+ * Deleted entries have .@perm == 0.
+ */
+void memory_region_notify_one(IOMMUNotifier *notifier,
+  IOMMUTLBEntry *entry);
+
+/**
   * memory_region_register_iommu_notifier: register a notifier for changes to
   * IOMMU translation entries.
   *
diff --git a/memory.c b/memory.c
index df62bd1..6e4c872 100644
--- a/memory.c
+++ b/memory.c
@@ -1665,26 +1665,33 @@ void 
memory_region_unregister_iommu_notifier(MemoryRegion *mr,
  memory_region_update_iommu_notify_flags(mr);
  }
  
-void memory_region_notify_iommu(MemoryRegion *mr,

-IOMMUTLBEntry entry)
+void memory_region_notify_one(IOMMUNotifier *notifier,
+  IOMMUTLBEntry *entry)
  {
-IOMMUNotifier *iommu_notifier;
  IOMMUNotifierFlag request_flags;
  
-assert(memory_region_is_iommu(mr));

-
-if (entry.perm & IOMMU_RW) {
+if (entry->perm & IOMMU_RW) {
  request_flags = IOMMU_NOTIFIER_MAP;
  } else {
  request_flags = IOMMU_NOTIFIER_UNMAP;
  }


Nit: you can keep this outside the loop.

Thanks

  
+if (notifier->notifier_flags & request_flags &&

+notifier->start <= entry->iova &&
+notifier->end >= entry->iova) {
+notifier->notify(notifier, entry);
+}
+}
+
+void memory_region_notify_iommu(MemoryRegion *mr,
+IOMMUTLBEntry entry)
+{
+IOMMUNotifier *iommu_notifier;
+
+assert(memory_region_is_iommu(mr));
+
  QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
-if (iommu_notifier->notifier_flags & request_flags &&
-iommu_notifier->start <= entry.iova &&
-iommu_notifier->end >= entry.iova) {
-iommu_notifier->notify(iommu_notifier, &entry);
-}
+memory_region_notify_one(iommu_notifier, &entry);
  }
  }

[Qemu-devel] [Bug 1654271] Re: host machine freezes

2017-01-13 Thread Thomas Huth

Moving this to the Ubuntu-qemu bug tracker since you're apparently using
Ubuntu's QEMU, not the upstream QEMU.

** Project changed: qemu => qemu (Ubuntu)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1654271

Title:
  host machine freezes

Status in qemu package in Ubuntu:
  New

Bug description:
  When trying to install Radeon crimson relive 16.12.1, each host machine 
freezes at the machine environment check stage.
  Even if you launch the installer in an environment where GPU is not 
installed, each host machine freezes in the same way.
  When Gusest's CPU is changed from 4 to 1, the environment check is completed 
normally.
  Even if FMA and AVX 2 are invalidated by CPU setting, environment check will 
be completed normally.
  Since 1 CPU does not freeze, I thought that it would be better to fix the 
CPU, but I will still freeze.
  Is it impossible to enable the function of AVX 2 (FMA?) In the virtual 
machine on KVM?

  HOST
Motherboard ： Asrock Z97 extream6
CPU  : Core i7-4790
Memory   : 24GB
OS   : Ubuntu 16.04(kernel 4.7.2-040702)
qemu : 2.5+dfsg-5ubuntu10.6
libvirt  : 1.3.1-1ubuntu10.5
ovmf : 0~20160408.ffea0a2c-2

  Guest
  　BIOS   : UEFI
OS : Windows10 Pro Build 14986
Memory : 8GB
GPU: Radeon HD7770

  ---
  
WinPC01
4f784d78-4d5e-416a-bb43-82ecd2cad409
8388608
8388608
4

  hvm
  /usr/share/OVMF/OVMF_CODE.fd
  /var/lib/libvirt/qemu/nvram/WinPC01_VARS.fd
  


  
  
  
  
  



  
  

  


  Haswell
  Intel
  


  
  
  
  

destroy
restart
restart

  
  


  /usr/bin/kvm-spice
  





  
  




  
  




  
  

  
  


  
  


  
  


  
  
  

  
  

  
  

  
  

  
  




  
  




  
  


  
  
  
  
  

  
  


  
  

  



  
  

  



  
  

  



  
  

  



  
  

  

  
  ---

  If you put the following designation in the CPU tag, it will not freeze.
  ---
  
  
  ---

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1654271/+subscriptions

[Qemu-devel] [PATCH] sdl2: fix build failure on windows

2017-01-13 Thread Gerd Hoffmann

Cc: Stefan Weil 
Cc: Samuel Thibault 
Signed-off-by: Gerd Hoffmann 
---
 ui/sdl2.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/ui/sdl2.c b/ui/sdl2.c
index 9a79b17..91fb111 100644
--- a/ui/sdl2.c
+++ b/ui/sdl2.c
@@ -817,9 +817,15 @@ void sdl_display_init(DisplayState *ds, int full_screen, 
int no_frame)
 sdl2_console[i].dcl.con = con;
 register_displaychangelistener(&sdl2_console[i].dcl);
 
+#if defined(SDL_VIDEO_DRIVER_WINDOWS) || defined(SDL_VIDEO_DRIVER_X11)
 if (SDL_GetWindowWMInfo(sdl2_console[i].real_window, &info)) {
+#if defined(SDL_VIDEO_DRIVER_WINDOWS)
+qemu_console_set_window_id(con, (uintptr_t)info.info.win.window);
+#elif defined(SDL_VIDEO_DRIVER_X11)
 qemu_console_set_window_id(con, info.info.x11.window);
+#endif
 }
+#endif
 }
 
 /* Load a 32x32x4 image. White pixels are transparent. */
-- 
1.8.3.1

[Qemu-devel] [Bug 886621] Re: Mac OS X Lion: segmentation fault

2017-01-13 Thread Thomas Huth

Triaging old bug tickets ... can you still reproduce this problem with
the latest version of QEMU (currently version 2.8)?

** Changed in: qemu
   Status: New => Incomplete

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/886621

Title:
  Mac OS X Lion: segmentation fault

Status in QEMU:
  Incomplete

Bug description:
  /usr/local/xeos-build/qemu/bin/qemu -boot order=a -M pc -cpu 486 -vga
  std -smp 1 -m 16 -soundhw sb16 -d
  out_asm,in_asm,op,op_opt,int,exec,cpu,pcall,cpu_reset -fda
  ./build/release/xeos.flp

  Process: qemu [5680]
  Path:/usr/local/xeos-build/qemu/bin/qemu
  Identifier:  qemu
  Version: ??? (???)
  Code Type:   X86-64 (Native)
  Parent Process:  make [5677]

  Date/Time:   2011-11-05 18:53:25.574 +0100
  OS Version:  Mac OS X 10.7.2 (11C74)
  Report Version:  9
  Sleep/Wake UUID: 3C81B8F7-0321-4621-923C-AB655F2CC701

  Interval Since Last Report:  503994 sec
  Crashes Since Last Report:   35
  Per-App Crashes Since Last Report:   9
  Anonymous UUID:  28E7367A-4697-43A4-8D12-005F1917DFD3

  Crashed Thread:  0  Dispatch queue: com.apple.main-thread

  Exception Type:  EXC_BAD_ACCESS (SIGSEGV)
  Exception Codes: KERN_INVALID_ADDRESS at 0x003a

  VM Regions Near 0x3a:
  -->
  __TEXT 000107c75000-000107ebc000 [ 2332K] r-x/rwx 
SM=COW  /usr/local/xeos-build/qemu/bin/qemu

  Application Specific Information:
  objc[5680]: garbage collection is OFF

  Thread 0 Crashed:: Dispatch queue: com.apple.main-thread
  0   qemu  0x000107d9d0ed 0x107c75000 + 1212653
  1   qemu  0x000107dabc39 0x107c75000 + 1272889
  2   ???   0x00010c3b007c 0 + 4500160636

  Thread 1:: Dispatch queue: com.apple.libdispatch-manager
  0   libsystem_kernel.dylib0x7fff85abb7e6 kevent + 10
  1   libdispatch.dylib 0x7fff8e7b15be _dispatch_mgr_invoke 
+ 923
  2   libdispatch.dylib 0x7fff8e7b014e _dispatch_mgr_thread 
+ 54

  Thread 2:
  0   libsystem_kernel.dylib0x7fff85abb192 __workq_kernreturn + 
10
  1   libsystem_c.dylib 0x7fff85886594 _pthread_wqthread + 
758
  2   libsystem_c.dylib 0x7fff85887b85 start_wqthread + 13

  Thread 3:
  0   libsystem_kernel.dylib0x7fff85abb192 __workq_kernreturn + 
10
  1   libsystem_c.dylib 0x7fff85886594 _pthread_wqthread + 
758
  2   libsystem_c.dylib 0x7fff85887b85 start_wqthread + 13

  Thread 4:
  0   libsystem_kernel.dylib0x7fff85abb192 __workq_kernreturn + 
10
  1   libsystem_c.dylib 0x7fff85886594 _pthread_wqthread + 
758
  2   libsystem_c.dylib 0x7fff85887b85 start_wqthread + 13

  Thread 5:
  0   libsystem_kernel.dylib0x7fff85abb036 __sigwait + 10
  1   libsystem_c.dylib 0x7fff8583aaab sigwait + 68
  2   qemu  0x000107d221ef 0x107c75000 + 709103
  3   libsystem_c.dylib 0x7fff858848bf _pthread_start + 335
  4   libsystem_c.dylib 0x7fff85887b75 thread_start + 13

  Thread 0 crashed with X86 Thread State (64-bit):
    rax: 0x5433ade07f7c29e7  rbx: 0x0010  rcx: 0x  
rdx: 0x2000
    rdi: 0x0010  rsi: 0x  rbp: 0x7fff678714a0  
rsp: 0x7fff67871470
     r8: 0x000109fe8000   r9: 0x0fff  r10: 0x7fa7c185c01d  
r11: 0x0246
    r12: 0x0001087ae368  r13: 0x  r14: 0x  
r15: 0x1f80
    rip: 0x000107d9d0ed  rfl: 0x00010202  cr2: 0x003a
  Logical CPU: 6

  Binary Images:
     0x107c75000 -0x107ebbff7 +qemu (??? - ???) 
 /usr/local/xeos-build/qemu/bin/qemu
     0x1087cb000 -0x1088b5fe7 +libglib-2.0.0.dylib (2704.0.0 - 
compatibility 2704.0.0) <5E6151CC-61F8-3335-A6FA-EFDD71474FA6> 
/usr/local/macmade/sw/glib/lib/libglib-2.0.0.dylib
     0x108917000 -0x10891 +libintl.8.dylib (9.1.0 - 
compatibility 9.0.0) <7D75E177-3172-2F78-1E08-1118A3D2D2A9> 
/usr/local/webstart/sw/gettext/lib/libintl.8.dylib
     0x108928000 -0x108949fff +libpng12.0.dylib (23.0.0 - 
compatibility 23.0.0)  
/usr/local/webstart/sw/lib-png/lib/libpng12.0.dylib
     0x10895a000 -0x10897aff7 +libjpeg.62.dylib (63.0.0 - 
compatibility 63.0.0)  
/usr/local/webstart/sw/lib-jpeg/lib/libjpeg.62.dylib
     0x108987000 -0x108a67ff7 +libiconv.2.dylib (8.0.0 - 
compatibility 8.0.0) <54A03BBE-E505-9FF1-79AA-D4D5139BBF9C> 
/usr/local/webstart/sw/lib-iconv/lib/libiconv.2.dylib
  0x7fff67875000 - 0x7fff678a9ac7  dyld (195.5 - ???) 
<4A6E2B28-C7A2-3528-ADB7-

[Qemu-devel] [Bug 917645] Re: [Feature request] ia64-softmmu wanted

2017-01-13 Thread Thomas Huth

** Changed in: qemu
   Importance: Undecided => Wishlist

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/917645

Title:
  [Feature request] ia64-softmmu wanted

Status in HelenOS branches:
  Confirmed
Status in QEMU:
  New

Bug description:
  Qemu is missing support for full system emulation of the Itanium
  architecture, which is one of the main non-x86 server architectures
  today (despite the alleged decline in popularity). It would be really
  nice if someone had interest in adding full ia64 support to Qemu. Many
  OS projects could then use Qemu as the universal machine emulator for
  development and testing.

  Note that there is an open source Ski simulator which can emulate an
  ia64 CPU, memory and a couple of Ski-specific devices, but the project
  seems inactive and the simulated machine is too simplified (no real
  devices, no real firmware). Moreover, it'd be better to have one tool
  such as Qemu for all architectures of interest rather than one per
  each architecture.

To manage notifications about this bug go to:
https://bugs.launchpad.net/helenos/+bug/917645/+subscriptions

[Qemu-devel] [Bug 894037] Re: With VNC, "-usbdevice tablet" no longer makes mouse pointers line up

2017-01-13 Thread Thomas Huth

According to comment #6 this has been fixed in version 1.0 ... is there
still something left to do here?

** Changed in: qemu
   Status: New => Incomplete

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/894037

Title:
  With VNC, "-usbdevice tablet" no longer makes mouse pointers line up

Status in QEMU:
  Incomplete

Bug description:
  I use qemu in VNC mode.  In order to get the client and server mouse
  pointers to line up, I've had to use the "-usbdevice tablet" option.
  This no longer works, and it behaves the same as if the option is not
  there.  This makes my VMs unusable to me.

  Here's how I'm booting WinXP:

  qemu-system-x86_64 -vga std -drive
  cache=writeback,index=0,media=disk,file=winxp.img -k en-us -m 2048
  -smp 2 -vnc :3101 -usbdevice tablet -boot c -enable-kvm &

  The Windows install hasn't changed, only qemu.

  I'm running this version of QEMU:

  QEMU emulator version 0.15.0 (qemu-kvm-0.15.0), Copyright (c)
  2003-2008 Fabrice Bellard

  I'll see about upgrading to 0.15.1, but since I haven't seen other
  reports of this particular problem in your DB, I'm assuming that this
  problem has not been fixed between 0.15.0 and 0.15.1.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/894037/+subscriptions

Re: [Qemu-devel] [PATCH v8 10/10] megasas: remove unnecessary megasas_use_msix()

2017-01-13 Thread Hannes Reinecke

On 01/13/2017 08:12 AM, Cao jin wrote:
> Also move certain hunk above, to place msix init related code together.
> 
> CC: Hannes Reinecke 
> CC: Paolo Bonzini 
> CC: Markus Armbruster 
> CC: Marcel Apfelbaum 
> CC: Michael S. Tsirkin 
> 
> Signed-off-by: Cao jin 
> ---
>  hw/scsi/megasas.c | 19 ++-
>  1 file changed, 6 insertions(+), 13 deletions(-)
> 
> msix_init() doesn't set the MSI-X enable bit, so use msix_enabled()
> is not right here, restore the old check without the
> megasas_use_msix() wrapper.
> 
Reviewed-by: Hannes Reinecke 

Cheers,

Hannes
-- 
Dr. Hannes ReineckeTeamlead Storage & Networking
h...@suse.de   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

Re: [Qemu-devel] [PATCH v8 00/10] Convert msix_init() to error

2017-01-13 Thread Markus Armbruster

Cao jin  writes:

> Only a tiny modification in patch "megasas: remove unnecessary
> megasas_use_msix()" to fix a megasas issue.

Please have a look at Michael's review in
Message-ID: <20170112163519-mutt-send-email-...@kernel.org>

> v8 changelog:
> 1. reorder: place the "megasas: remove unnecessary megasas_use_msix()"
>as the last one. and fix the bug in it, detailed description in it,
>also removed the R-b of it.
> 2. Add the Acked-by from Marcel for first 9 patches; add the R-b from Markus
>to "hcd-xhci: check & correct param before using it".
>
> Test:
> 1. make check ok
> 2. command line test for all affected device, make sure their realization
>is ok.
> 3. detailed test for megasas, hcd-xhci, vmxnet3.
>megasas: install a linux distro is ok
>./qemu-system-x86_64 --enable-kvm -m 1024
>-device megasas,id=scsi0,bus=pci.0
>-drive file=/xx/scsi-disk.img,if=none,id=drive-scsi0
>-device 
> scsi-disk,bus=scsi0.0,channel=0,scsi-id=4,lun=0,drive=drive-scsi0,id=scsi0-4
>-cdrom /xx/Fedora-Server-DVD-x86_64-23.iso --monitor stdio
>
>hcd-xhci: partition the usbstick.img, mkfs, write to file, is ok
>./qemu-system-x86_64 -M q35 -m 1024 --enable-kvm
>-drive if=none,id=usbstick,file=/xx/usbstick.img
>-device nec-usb-xhci,id=usb,p2=8,p3=8,bus=pcie.0
>-device usb-storage,bus=usb.0,drive=usbstick --monitor stdio
>/xx/FedoraWorkStatsion23-x86_64.img
>
>vmxnet3: ping another destination belongs to host's network is ok.
>But no migration test, because I don't have a spare machine for now.
>./qemu-system-x86_64 -M q35 -m 1024 --enable-kvm
>-netdev tap,id=mynet0 -device vmxnet3,netdev=mynet0
>--monitor stdio /home/pino/vm/FedoraWorkStatsion23-x86_64.img

You could migrate to a new QEMU on the same machine.

Or, if you don't want to deal with two instances of QEMU running at the
same time, migrate to file:

(qemu) migrate "exec:cat >zzz"

Start the target with -incoming "exec:cat zzz".  Better than nothing.

[Qemu-devel] [Bug 893068] Re: Spanish keys { and [ did not work

2017-01-13 Thread Thomas Huth

Triaging old bug tickets ... can you still reproduce this problem with
the latest version of QEMU (currently version 2.8)?

** Changed in: qemu
   Status: New => Incomplete

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/893068

Title:
  Spanish keys { and [ did not work

Status in QEMU:
  Incomplete

Bug description:
  The keys { and [ did not work inside the virtualized enviorment
  (widnows 7).  The problems happens ussing aqemu as a front end as well
  as invoking qemu-kvm from command line:

  qemu-kvm -m 8096 eclipse.img -smp cores=4,threads=2 -hdb ander.img -k
  es

  We have also notices this warning in the console:

  Warning: no scancode found for keysym 314

  The host system is gentoo stable with some exceptions (mainly qemu-
  kvm-0.15.1-r1, gcc-4.6.2 and kernel-3.2_rc2)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/893068/+subscriptions

Re: [Qemu-devel] [PATCH v8 10/10] megasas: remove unnecessary megasas_use_msix()

2017-01-13 Thread Markus Armbruster

Cao jin  writes:

> Also move certain hunk above, to place msix init related code together.
>
> CC: Hannes Reinecke 
> CC: Paolo Bonzini 
> CC: Markus Armbruster 
> CC: Marcel Apfelbaum 
> CC: Michael S. Tsirkin 
>
> Signed-off-by: Cao jin 
> ---
>  hw/scsi/megasas.c | 19 ++-
>  1 file changed, 6 insertions(+), 13 deletions(-)
>
> msix_init() doesn't set the MSI-X enable bit, so use msix_enabled()
> is not right here, restore the old check without the
> megasas_use_msix() wrapper.

Reviewed-by: Markus Armbruster

Re: [Qemu-devel] [PATCH v8 00/10] Convert msix_init() to error

2017-01-13 Thread Cao jin



On 01/13/2017 04:22 PM, Markus Armbruster wrote:
> Cao jin  writes:
> 
>> Only a tiny modification in patch "megasas: remove unnecessary
>> megasas_use_msix()" to fix a megasas issue.
> 
> Please have a look at Michael's review in
> Message-ID: <20170112163519-mutt-send-email-...@kernel.org>
> 

Oh...thanks for reminding, I missed that mail...

>> v8 changelog:
>> 1. reorder: place the "megasas: remove unnecessary megasas_use_msix()"
>>as the last one. and fix the bug in it, detailed description in it,
>>also removed the R-b of it.
>> 2. Add the Acked-by from Marcel for first 9 patches; add the R-b from Markus
>>to "hcd-xhci: check & correct param before using it".
>>
>> Test:
>> 1. make check ok
>> 2. command line test for all affected device, make sure their realization
>>is ok.
>> 3. detailed test for megasas, hcd-xhci, vmxnet3.
>>megasas: install a linux distro is ok
>>./qemu-system-x86_64 --enable-kvm -m 1024
>>-device megasas,id=scsi0,bus=pci.0
>>-drive file=/xx/scsi-disk.img,if=none,id=drive-scsi0
>>-device 
>> scsi-disk,bus=scsi0.0,channel=0,scsi-id=4,lun=0,drive=drive-scsi0,id=scsi0-4
>>-cdrom /xx/Fedora-Server-DVD-x86_64-23.iso --monitor stdio
>>
>>hcd-xhci: partition the usbstick.img, mkfs, write to file, is ok
>>./qemu-system-x86_64 -M q35 -m 1024 --enable-kvm
>>-drive if=none,id=usbstick,file=/xx/usbstick.img
>>-device nec-usb-xhci,id=usb,p2=8,p3=8,bus=pcie.0
>>-device usb-storage,bus=usb.0,drive=usbstick --monitor stdio
>>/xx/FedoraWorkStatsion23-x86_64.img
>>
>>vmxnet3: ping another destination belongs to host's network is ok.
>>But no migration test, because I don't have a spare machine for now.
>>./qemu-system-x86_64 -M q35 -m 1024 --enable-kvm
>>-netdev tap,id=mynet0 -device vmxnet3,netdev=mynet0
>>--monitor stdio /home/pino/vm/FedoraWorkStatsion23-x86_64.img
> 
> You could migrate to a new QEMU on the same machine.
> 
> Or, if you don't want to deal with two instances of QEMU running at the
> same time, migrate to file:
> 
> (qemu) migrate "exec:cat >zzz"
> 
> Start the target with -incoming "exec:cat zzz".  Better than nothing.
> 

Thanks for the info, I will try

-- 
Sincerely,
Cao jin

Re: [Qemu-devel] [PATCH] sdl2: fix build failure on windows

2017-01-13 Thread Stefan Weil

Am 13.01.2017 um 09:14 schrieb Gerd Hoffmann:
> Cc: Stefan Weil 
> Cc: Samuel Thibault 
> Signed-off-by: Gerd Hoffmann 
> ---
>  ui/sdl2.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/ui/sdl2.c b/ui/sdl2.c
> index 9a79b17..91fb111 100644
> --- a/ui/sdl2.c
> +++ b/ui/sdl2.c
> @@ -817,9 +817,15 @@ void sdl_display_init(DisplayState *ds, int full_screen, 
> int no_frame)
>  sdl2_console[i].dcl.con = con;
>  register_displaychangelistener(&sdl2_console[i].dcl);
>  
> +#if defined(SDL_VIDEO_DRIVER_WINDOWS) || defined(SDL_VIDEO_DRIVER_X11)
>  if (SDL_GetWindowWMInfo(sdl2_console[i].real_window, &info)) {
> +#if defined(SDL_VIDEO_DRIVER_WINDOWS)
> +qemu_console_set_window_id(con, (uintptr_t)info.info.win.window);
> +#elif defined(SDL_VIDEO_DRIVER_X11)
>  qemu_console_set_window_id(con, info.info.x11.window);
> +#endif
>  }
> +#endif
>  }
>  
>  /* Load a 32x32x4 image. White pixels are transparent. */


Thanks.

Reviewed-by: Stefan Weil

Re: [Qemu-devel] [PATCH V2 0/3] hw/pcie: Introduce Generic PCI Express Root Port

2017-01-13 Thread Gerd Hoffmann

  Hi,

> Sure, thanks for the pointers!
> I suppose I can change later to msi-x in a follow up patch
> to avoid introducing new bugs from the beginning :)

I'd use msi-x right from start to avoid any backward compatibility
issues.

cheers,
  Gerd

[Qemu-devel] [Bug 1656234] [NEW] Qemu core dumped if using virtio-net

2017-01-13 Thread Robert Hu

Public bug reported:

System Environment
===
Qemu commit/branch: e92fbc75
Host OS: RHEL7.2 ia32e
Host Kernel: 4.9.0
Guest OS: RHEL7.2 ia32e
Guest Kernel: 4.9.0

Bug detailed description
===
While create a kvm guest using virtio-net, the qemu will core dump with showing 
"Aborted (core dumped)".
Reproduce Steps
==
create a guest:
qemu-system-x86_64 -enable-kvm -m 4096 -smp 4 -device 
virtio-net-pci,netdev=nic0,mac=00:16:3e:49:be:24 -netdev 
tap,id=nic0,script=/etc/kvm/qemu-ifup -drive 
file=/ia32e_rhel7u2_kvm.qcow2,if=none,id=virtio-disk0 -device 
virtio-blk-pci,drive=virtio-disk0 -serial file:serial.log

Current Result:
==

qemu-system-x86_64: 
/workspace/ia32e/nightly/kvm-next-20170105-ef85b6-e92fbc/kvm/qemu/hw/virtio/virtio.c:214:
 virtio_queue_set_notification: Assertion `vq->notification_disabled > 0' 
failed.
Aborted (core dumped)

add info

[root@hsw-ep2 Desktop]# dmesg |grep -v virbr0 |tail -n 10
[ 1760.265000] device tap0 left promiscuous mode
[ 1879.148642] device tap0 entered promiscuous mode
[ 1885.213702] kvm [14186]: vcpu0, guest rIP: 0x81066784 disabled 
perfctr wrmsr: 0xc2 data 0x
[ 1912.258783] device tap0 left promiscuous mode
[ 1995.972267] device tap0 entered promiscuous mode
[ 2001.990207] kvm [14335]: vcpu0, guest rIP: 0x81066784 disabled 
perfctr wrmsr: 0xc2 data 0x
[ 2024.703072] device tap0 left promiscuous mode
[ 2145.374239] device tap0 entered promiscuous mode
[ 2151.409948] kvm [14541]: vcpu0, guest rIP: 0x81066784 disabled 
perfctr wrmsr: 0xc2 data 0x
[ 2178.281446] device tap0 left promiscuous mode

** Affects: qemu
 Importance: Undecided
 Status: New

** Attachment added: "guest serial log"
   https://bugs.launchpad.net/bugs/1656234/+attachment/4803773/+files/serial.log

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1656234

Title:
  Qemu core dumped if using virtio-net

Status in QEMU:
  New

Bug description:
  System Environment
  ===
  Qemu commit/branch: e92fbc75
  Host OS: RHEL7.2 ia32e
  Host Kernel: 4.9.0
  Guest OS: RHEL7.2 ia32e
  Guest Kernel: 4.9.0

  Bug detailed description
  ===
  While create a kvm guest using virtio-net, the qemu will core dump with 
showing "Aborted (core dumped)".
  Reproduce Steps
  ==
  create a guest:
  qemu-system-x86_64 -enable-kvm -m 4096 -smp 4 -device 
virtio-net-pci,netdev=nic0,mac=00:16:3e:49:be:24 -netdev 
tap,id=nic0,script=/etc/kvm/qemu-ifup -drive 
file=/ia32e_rhel7u2_kvm.qcow2,if=none,id=virtio-disk0 -device 
virtio-blk-pci,drive=virtio-disk0 -serial file:serial.log

  Current Result:
  ==

  qemu-system-x86_64: 
/workspace/ia32e/nightly/kvm-next-20170105-ef85b6-e92fbc/kvm/qemu/hw/virtio/virtio.c:214:
 virtio_queue_set_notification: Assertion `vq->notification_disabled > 0' 
failed.
  Aborted (core dumped)

  add info
  
  [root@hsw-ep2 Desktop]# dmesg |grep -v virbr0 |tail -n 10
  [ 1760.265000] device tap0 left promiscuous mode
  [ 1879.148642] device tap0 entered promiscuous mode
  [ 1885.213702] kvm [14186]: vcpu0, guest rIP: 0x81066784 disabled 
perfctr wrmsr: 0xc2 data 0x
  [ 1912.258783] device tap0 left promiscuous mode
  [ 1995.972267] device tap0 entered promiscuous mode
  [ 2001.990207] kvm [14335]: vcpu0, guest rIP: 0x81066784 disabled 
perfctr wrmsr: 0xc2 data 0x
  [ 2024.703072] device tap0 left promiscuous mode
  [ 2145.374239] device tap0 entered promiscuous mode
  [ 2151.409948] kvm [14541]: vcpu0, guest rIP: 0x81066784 disabled 
perfctr wrmsr: 0xc2 data 0x
  [ 2178.281446] device tap0 left promiscuous mode

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1656234/+subscriptions

Re: [Qemu-devel] [PATCH] nvdimm: allow read/write zero-size namespace label

2017-01-13 Thread Xiao Guangrong




On 01/13/2017 11:02 AM, Li Qiang wrote:

From: Li Qiang 

The spec doesn't say the namespace label can't be zero
when read/write it. As this is no harmful, just allow
it.



WHY?

The spec said that the label should be at least 128K.

Re: [Qemu-devel] [PATCH RFC v3 04/14] intel_iommu: fix trace for inv desc handling

2017-01-13 Thread Peter Xu

On Fri, Jan 13, 2017 at 03:46:31PM +0800, Jason Wang wrote:
> 
> 
> On 2017年01月13日 11:06, Peter Xu wrote:
> >VT-d codes are still using static DEBUG_INTEL_IOMMU macro. That's not
> >good, and we should end the day when we need to recompile the code
> >before getting useful debugging information for vt-d. Time to switch to
> >the trace system.
> >
> >This is the first patch to do it.
> >
> >Generally, the rule of mine is:
> >
> >- for the old GENERAL typed message, I use error_report() directly if
> >   apply. Those are something shouldn't happen, and we should print those
> >   errors in all cases, even without enabling debug and tracing.
> 
> Looks like some were guest trigger-able. If yes, let's try not use
> error_report() for not being flooded.

Yes, it's intended. Most of the error_report()s in this patch can be
triggered by guest, but only by illegal guest behaviors (e.g.,
non-zero reserved fields, or illegal descriptors, etc.). In that
sense, shall we keep them even guest can trigger them? Since people
will never see them if they are running generic and good kernels. More
importantly, these error_report()s can be good hints when guest
encounters issues, for better debugging and triaging.

Actually we have such usage in existing QEMU as well. For example,
when we maintain the DMA mapping in vfio-pci, it's possible that the
shadow page table is mapped illegally due to some reason (that depends
on the guest as well, may not be guest kernel, but DPDK applications
inside guest), and the map() can fail. Here we have:

ret = vfio_dma_map(container, iova,
iotlb->addr_mask + 1, vaddr,
!(iotlb->perm & IOMMU_WO) || mr->readonly);
if (ret) {
error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%m)",
container, iova,
iotlb->addr_mask + 1, vaddr, ret);
}

Which I think is playing the same role here - we will never see these
lines if the guest is normal, and these lines will be useful when bad
things happen.

So I would slightly prefer that we keep these error_reports() for now,
as long as they won't flush the screen for most of the users. (during
the time I played with this series, none of them jumped out :)

Thanks,

-- peterx

Re: [Qemu-devel] [PULL 10/65] tcg/s390: Expose host facilities to tcg-target.h

2017-01-13 Thread Christian Borntraeger

On 01/11/2017 03:17 AM, Richard Henderson wrote:
> This lets us expose facilities to TCG_TARGET_HAS_* defines
> directly, rather than hiding behind function calls.
> 
> Signed-off-by: Richard Henderson 
> ---
>  tcg/s390/tcg-target.h | 126 
> --
>  tcg/s390/tcg-target.inc.c |  74 +++
>  2 files changed, 96 insertions(+), 104 deletions(-)

This broke compilation.Seems that you forgot one replacement:

In file included from /home/cborntra/REPOS/qemu/tcg/tcg.c:253:0:
/home/cborntra/REPOS/qemu/tcg/s390/tcg-target.inc.c: In function ‘tgen_cmp’:
/home/cborntra/REPOS/qemu/tcg/s390/tcg-target.inc.c:1096:19: error: 
‘facilities’ undeclared (first use in this function)
 if (!(facilities & FACILITY_EXT_IMM)) {
   ^~
/home/cborntra/REPOS/qemu/tcg/s390/tcg-target.inc.c:1096:19: note: each 
undeclared identifier is reported only once for each function it appears in
/home/cborntra/REPOS/qemu/rules.mak:64: recipe for target 'tcg/tcg.o' failed
make[1]: *** [tcg/tcg.o] Error 1
Makefile:203: recipe for target 'subdir-s390x-softmmu' failed
make: *** [subdir-s390x-softmmu] Error 2
make: Leaving directory '/home/cborntra/REPOS/qemu/build'

[Qemu-devel] [PATCH v2 0/2] POWER9 TCG enablements - part12

2017-01-13 Thread Nikunj A Dadhania

This series contains 5 new instructions for POWER9 ISA3.0
VSX Scalar Test Data Class
VSX Vector Test Data Class

Changelog:
v1:
* Zero the match variable in the element loops

v0:
* Concise logic for identifying data class in Scalar/Vector 
  test data class instructions

Nikunj A Dadhania (2):
  target-ppc: Add xvtstdc[sp,dp] instructions
  target-ppc: Add xststdc[sp, dp, qp] instructions

 target/ppc/fpu_helper.c | 90 +
 target/ppc/helper.h |  5 +++
 target/ppc/internal.h   |  6 ++-
 target/ppc/translate/vsx-impl.inc.c |  5 +++
 target/ppc/translate/vsx-ops.inc.c  | 12 +
 5 files changed, 116 insertions(+), 2 deletions(-)

-- 
2.7.4

Re: [Qemu-devel] [PATCH RFC v3 07/14] memory: add section range info for IOMMU notifier

2017-01-13 Thread Peter Xu

On Fri, Jan 13, 2017 at 03:55:22PM +0800, Jason Wang wrote:
> 
> 
> On 2017年01月13日 11:06, Peter Xu wrote:
> >In this patch, IOMMUNotifier.{start|end} are introduced to store section
> >information for a specific notifier. When notification occurs, we not
> >only check the notification type (MAP|UNMAP), but also check whether the
> >notified iova is in the range of specific IOMMU notifier, and skip those
> >notifiers if not in the listened range.
> >
> >When removing an region, we need to make sure we removed the correct
> >VFIOGuestIOMMU by checking the IOMMUNotifier.start address as well.
> >
> >Suggested-by: David Gibson 
> >Reviewed-by: David Gibson 
> >Acked-by: Paolo Bonzini 
> >Signed-off-by: Peter Xu 
> >---
> >  hw/vfio/common.c  | 7 ++-
> >  include/exec/memory.h | 3 +++
> >  memory.c  | 4 +++-
> >  3 files changed, 12 insertions(+), 2 deletions(-)
> >
> >diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >index 801578b..6f648da 100644
> >--- a/hw/vfio/common.c
> >+++ b/hw/vfio/common.c
> >@@ -455,6 +455,10 @@ static void vfio_listener_region_add(MemoryListener 
> >*listener,
> >  giommu->container = container;
> >  giommu->n.notify = vfio_iommu_map_notify;
> >  giommu->n.notifier_flags = IOMMU_NOTIFIER_ALL;
> >+giommu->n.start = section->offset_within_region;
> >+llend = int128_add(int128_make64(giommu->n.start), section->size);
> >+llend = int128_sub(llend, int128_one());
> >+giommu->n.end = int128_get64(llend);
> >  QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
> >  memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
> >@@ -525,7 +529,8 @@ static void vfio_listener_region_del(MemoryListener 
> >*listener,
> >  VFIOGuestIOMMU *giommu;
> >  QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
> >-if (giommu->iommu == section->mr) {
> >+if (giommu->iommu == section->mr &&
> >+giommu->n.start == section->offset_within_region) {
> >  memory_region_unregister_iommu_notifier(giommu->iommu,
> >  &giommu->n);
> >  QLIST_REMOVE(giommu, giommu_next);
> >diff --git a/include/exec/memory.h b/include/exec/memory.h
> >index bec9756..7649e74 100644
> >--- a/include/exec/memory.h
> >+++ b/include/exec/memory.h
> >@@ -84,6 +84,9 @@ typedef enum {
> >  struct IOMMUNotifier {
> >  void (*notify)(struct IOMMUNotifier *notifier, IOMMUTLBEntry *data);
> >  IOMMUNotifierFlag notifier_flags;
> >+/* Notify for address space range start <= addr <= end */
> >+hwaddr start;
> >+hwaddr end;
> >  QLIST_ENTRY(IOMMUNotifier) node;
> >  };
> >  typedef struct IOMMUNotifier IOMMUNotifier;
> >diff --git a/memory.c b/memory.c
> >index 2bfc37f..e88bb54 100644
> >--- a/memory.c
> >+++ b/memory.c
> >@@ -1671,7 +1671,9 @@ void memory_region_notify_iommu(MemoryRegion *mr,
> >  }
> >  QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
> >-if (iommu_notifier->notifier_flags & request_flags) {
> >+if (iommu_notifier->notifier_flags & request_flags &&
> >+iommu_notifier->start <= entry.iova &&
> >+iommu_notifier->end >= entry.iova) {
> >  iommu_notifier->notify(iommu_notifier, &entry);
> >  }
> >  }
> 
> This seems breaks vhost device IOTLB. How about keep the the behavior
> somehow?

Thanks to point out. How about I squash this into this patch?

8<
diff --git a/memory.c b/memory.c
index e88bb54..6de02dd 100644
--- a/memory.c
+++ b/memory.c
@@ -1608,8 +1608,14 @@ void memory_region_register_iommu_notifier(MemoryRegion 
*mr,
 return;
 }
 
+if (n->start == 0 && n->end == 0) {
+/* If these are not specified, we listen to the whole range */
+n->end = (hwaddr)(-1);
+}
+
 /* We need to register for at least one bitfield */
 assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
+assert(n->start <= n->end);
 QLIST_INSERT_HEAD(&mr->iommu_notify, n, node);
 memory_region_update_iommu_notify_flags(mr);
 }
>8

-- peterx

Re: [Qemu-devel] [virtio-dev] Re: [PATCH v6 kernel 2/5] virtio-balloon: define new feature bit and head struct

2017-01-13 Thread Li, Liang Z

> On Wed, Dec 21, 2016 at 02:52:25PM +0800, Liang Li wrote:
> > Add a new feature which supports sending the page information with
> > range array. The current implementation uses PFNs array, which is not
> > very efficient. Using ranges can improve the performance of
> > inflating/deflating significantly.
> >
> > Signed-off-by: Liang Li 
> > Cc: Michael S. Tsirkin 
> > Cc: Paolo Bonzini 
> > Cc: Cornelia Huck 
> > Cc: Amit Shah 
> > Cc: Dave Hansen 
> > Cc: Andrea Arcangeli 
> > Cc: David Hildenbrand 
> > ---
> >  include/uapi/linux/virtio_balloon.h | 12 
> >  1 file changed, 12 insertions(+)
> >
> > diff --git a/include/uapi/linux/virtio_balloon.h
> > b/include/uapi/linux/virtio_balloon.h
> > index 343d7dd..2f850bf 100644
> > --- a/include/uapi/linux/virtio_balloon.h
> > +++ b/include/uapi/linux/virtio_balloon.h
> > @@ -34,10 +34,14 @@
> >  #define VIRTIO_BALLOON_F_MUST_TELL_HOST0 /* Tell before
> reclaiming pages */
> >  #define VIRTIO_BALLOON_F_STATS_VQ  1 /* Memory Stats virtqueue
> */
> >  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM2 /* Deflate balloon
> on OOM */
> > +#define VIRTIO_BALLOON_F_PAGE_RANGE3 /* Send page info
> with ranges */
> >
> >  /* Size of a PFN in the balloon interface. */  #define
> > VIRTIO_BALLOON_PFN_SHIFT 12
> >
> > +/* Bits width for the length of the pfn range */
> 
> What does this mean? Couldn't figure it out.
> 
> > +#define VIRTIO_BALLOON_NR_PFN_BITS 12
> > +
> >  struct virtio_balloon_config {
> > /* Number of pages host wants Guest to give up. */
> > __u32 num_pages;
> > @@ -82,4 +86,12 @@ struct virtio_balloon_stat {
> > __virtio64 val;
> >  } __attribute__((packed));
> >
> > +/* Response header structure */
> > +struct virtio_balloon_resp_hdr {
> > +   __le64 cmd : 8; /* Distinguish different requests type */
> > +   __le64 flag: 8; /* Mark status for a specific request type */
> > +   __le64 id : 16; /* Distinguish requests of a specific type */
> > +   __le64 data_len: 32; /* Length of the following data, in bytes */
> 
> This use of __le64 makes no sense.  Just use u8/le16/le32 pls.
> 

Got it, will change in the next version. 

And could help take a look at other parts? as well as the QEMU part.

Thanks!
Liang

[Qemu-devel] [PATCH v2 2/2] target-ppc: Add xststdc[sp, dp, qp] instructions

2017-01-13 Thread Nikunj A Dadhania

xststdcsp: VSX Scalar Test Data Class Single-Precision
xststdcdp: VSX Scalar Test Data Class Double-Precision
xststdcqp: VSX Scalar Test Data Class Quad-Precision

Signed-off-by: Nikunj A Dadhania 
---
 target/ppc/fpu_helper.c | 66 -
 target/ppc/helper.h |  3 ++
 target/ppc/internal.h   |  1 +
 target/ppc/translate/vsx-impl.inc.c |  3 ++
 target/ppc/translate/vsx-ops.inc.c  |  4 +++
 5 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 45bc93c..9f5cafd 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -3196,17 +3196,22 @@ void helper_xvxsigsp(CPUPPCState *env, uint32_t opcode)
  *   fld   - vsr_t field (VsrD(*) or VsrW(*))
  *   tfld   - target vsr_t field (VsrD(*) or VsrW(*))
  *   fld_max - target field max
+ *   scrf - set result in CR and FPCC
  */
-#define VSX_TEST_DC(op, nels, xbn, tp, fld, tfld, fld_max)  \
+#define VSX_TEST_DC(op, nels, xbn, tp, fld, tfld, fld_max, scrf)  \
 void helper_##op(CPUPPCState *env, uint32_t opcode) \
 {   \
 ppc_vsr_t xt, xb;   \
 uint32_t i, sign, dcmx; \
-uint32_t match = 0; \
+uint32_t cc, match = 0; \
 \
 getVSR(xbn, &xb, env);  \
-memset(&xt, 0, sizeof(xt)); \
-dcmx = DCMX_XV(opcode); \
+if (!scrf) {\
+memset(&xt, 0, sizeof(xt)); \
+dcmx = DCMX_XV(opcode); \
+} else {\
+dcmx = DCMX(opcode);\
+}   \
 \
 for (i = 0; i < nels; i++) {\
 sign = tp##_is_neg(xb.fld); \
@@ -3219,11 +3224,56 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) 
\
 } else if (tp##_is_zero_or_denormal(xb.fld)) {  \
 match = extract32(dcmx, 0 + !sign, 1);  \
 }   \
-xt.tfld = match ? fld_max : 0;  \
+\
+if (scrf) { \
+cc = sign << CRF_LT_BIT | match << CRF_EQ_BIT;  \
+env->fpscr &= ~(0x0F << FPSCR_FPRF);\
+env->fpscr |= cc << FPSCR_FPRF; \
+env->crf[BF(opcode)] = cc;  \
+} else {\
+xt.tfld = match ? fld_max : 0;  \
+}   \
 match = 0;  \
 }   \
-putVSR(xT(opcode), &xt, env);   \
+if (!scrf) {\
+putVSR(xT(opcode), &xt, env);   \
+}   \
 }
 
-VSX_TEST_DC(xvtstdcdp, 2, xB(opcode), float64, VsrD(i), VsrD(i), UINT64_MAX)
-VSX_TEST_DC(xvtstdcsp, 4, xB(opcode), float32, VsrW(i), VsrW(i), UINT32_MAX)
+VSX_TEST_DC(xvtstdcdp, 2, xB(opcode), float64, VsrD(i), VsrD(i), UINT64_MAX, 0)
+VSX_TEST_DC(xvtstdcsp, 4, xB(opcode), float32, VsrW(i), VsrW(i), UINT32_MAX, 0)
+VSX_TEST_DC(xststdcdp, 1, xB(opcode), float64, VsrD(0), VsrD(0), 0, 1)
+VSX_TEST_DC(xststdcqp, 1, (rB(opcode) + 32), float128, f128, VsrD(0), 0, 1)
+
+void helper_xststdcsp(CPUPPCState *env, uint32_t opcode)
+{
+ppc_vsr_t xb;
+uint32_t dcmx, sign, exp;
+uint32_t cc, match = 0, not_sp = 0;
+
+getVSR(xB(opcode), &xb, env);
+dcmx = DCMX(opcode);
+exp = (xb.VsrD(0) >> 52) & 0x7FF;
+
+sign = float64_is_neg(xb.VsrD(0));
+if (float64_is_any_nan(xb.VsrD(0))) {
+match = extract32(dcmx, 6, 1);
+} else if (float64_is_infinity(xb.VsrD(0))) {
+match = extract32(dcmx, 4 + !sign, 1);
+} else if (float64_is_zero(xb.VsrD(0))) {
+match = extract32(dcmx, 2 + !sign, 1);
+} else if (float64_is_zero_or_denormal(xb.VsrD(0)) ||
+   (exp > 0 && exp < 0x381)) {
+match = extract32(dcmx, 0 + !sign, 1);
+}
+
+not_sp = !float64_eq(xb.VsrD(0),
+ float32_to_float64(
+ float64_to_float32(xb.VsrD(0), &env->fp_status),
+ &env->fp_status), &env->fp_status);
+
+cc = sign << CRF_

Re: [Qemu-devel] [PATCH RFC v3 11/14] intel_iommu: provide its own replay() callback

2017-01-13 Thread Jason Wang




On 2017年01月13日 11:06, Peter Xu wrote:

The default replay() don't work for VT-d since vt-d will have a huge
default memory region which covers address range 0-(2^64-1). This will
normally bring a dead loop when guest starts.


I think it just takes too much time instead of dead loop?



The solution is simple - we don't walk over all the regions. Instead, we
jump over the regions when we found that the page directories are empty.
It'll greatly reduce the time to walk the whole region.


Yes, the problem is memory_region_is_iommu_reply() not smart because:

- It doesn't understand large page
- try go over all possible iova

So I'm thinking to introduce something like iommu_ops->iova_iterate() which

1) accept an start iova and return the next exist map
2) understand large page
3) skip unmapped iova



To achieve this, we provided a page walk helper to do that, invoking
corresponding hook function when we found an page we are interested in.
vtd_page_walk_level() is the core logic for the page walking. It's
interface is designed to suite further use case, e.g., to invalidate a
range of addresses.

Signed-off-by: Peter Xu


For intel iommu, since we intercept all map and unmap, a more tricky 
ieda is to we can record the mappings internally in something like a 
rbtree which could be iterated during replay. This saves possible guest 
io page table traversal, but drawback is it may not survive from OOM 
attacker.


Thanks

[Qemu-devel] [PATCH v2 1/2] target-ppc: Add xvtstdc[sp, dp] instructions

2017-01-13 Thread Nikunj A Dadhania

xvtstdcsp: VSX Vector Test Data Class Single-Precision
xvtstdcdp: VSX Vector Test Data Class Double-Precision

Signed-off-by: Nikunj A Dadhania 
---
 target/ppc/fpu_helper.c | 40 +
 target/ppc/helper.h |  2 ++
 target/ppc/internal.h   |  5 +++--
 target/ppc/translate/vsx-impl.inc.c |  2 ++
 target/ppc/translate/vsx-ops.inc.c  |  8 
 5 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index ffcf9ca..45bc93c 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -3187,3 +3187,43 @@ void helper_xvxsigsp(CPUPPCState *env, uint32_t opcode)
 }
 putVSR(xT(opcode), &xt, env);
 }
+
+/* VSX_TEST_DC - VSX floating point test data class
+ *   op- instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   xbn   - VSR register number
+ *   tp- type (float32 or float64)
+ *   fld   - vsr_t field (VsrD(*) or VsrW(*))
+ *   tfld   - target vsr_t field (VsrD(*) or VsrW(*))
+ *   fld_max - target field max
+ */
+#define VSX_TEST_DC(op, nels, xbn, tp, fld, tfld, fld_max)  \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{   \
+ppc_vsr_t xt, xb;   \
+uint32_t i, sign, dcmx; \
+uint32_t match = 0; \
+\
+getVSR(xbn, &xb, env);  \
+memset(&xt, 0, sizeof(xt)); \
+dcmx = DCMX_XV(opcode); \
+\
+for (i = 0; i < nels; i++) {\
+sign = tp##_is_neg(xb.fld); \
+if (tp##_is_any_nan(xb.fld)) {  \
+match = extract32(dcmx, 6, 1);  \
+} else if (tp##_is_infinity(xb.fld)) {  \
+match = extract32(dcmx, 4 + !sign, 1);  \
+} else if (tp##_is_zero(xb.fld)) {  \
+match = extract32(dcmx, 2 + !sign, 1);  \
+} else if (tp##_is_zero_or_denormal(xb.fld)) {  \
+match = extract32(dcmx, 0 + !sign, 1);  \
+}   \
+xt.tfld = match ? fld_max : 0;  \
+match = 0;  \
+}   \
+putVSR(xT(opcode), &xt, env);   \
+}
+
+VSX_TEST_DC(xvtstdcdp, 2, xB(opcode), float64, VsrD(i), VsrD(i), UINT64_MAX)
+VSX_TEST_DC(xvtstdcsp, 4, xB(opcode), float32, VsrW(i), VsrW(i), UINT32_MAX)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 9d4ed08..165e4a5 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -546,6 +546,8 @@ DEF_HELPER_2(xvcvsxdsp, void, env, i32)
 DEF_HELPER_2(xvcvuxdsp, void, env, i32)
 DEF_HELPER_2(xvcvsxwsp, void, env, i32)
 DEF_HELPER_2(xvcvuxwsp, void, env, i32)
+DEF_HELPER_2(xvtstdcsp, void, env, i32)
+DEF_HELPER_2(xvtstdcdp, void, env, i32)
 DEF_HELPER_2(xvrspi, void, env, i32)
 DEF_HELPER_2(xvrspic, void, env, i32)
 DEF_HELPER_2(xvrspim, void, env, i32)
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index c22d74e..4c3811a 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -68,7 +68,7 @@ static inline uint32_t name(uint32_t opcode)  
\
 ((opcode >> (shift2)) & ((1 << (nb2)) - 1));  \
 }
 
-#define EXTRACT_HELPER_DXFORM(name,   \
+#define EXTRACT_HELPER_SPLIT_3(name,  \
   d0_bits, shift_op_d0, shift_d0, \
   d1_bits, shift_op_d1, shift_d1, \
   d2_bits, shift_op_d2, shift_d2) \
@@ -156,7 +156,7 @@ EXTRACT_HELPER(FPFLM, 17, 8);
 EXTRACT_HELPER(FPW, 16, 1);
 
 /* addpcis */
-EXTRACT_HELPER_DXFORM(DX, 10, 6, 6, 5, 16, 1, 1, 0, 0)
+EXTRACT_HELPER_SPLIT_3(DX, 10, 6, 6, 5, 16, 1, 1, 0, 0)
 #if defined(TARGET_PPC64)
 /* darn */
 EXTRACT_HELPER(L, 16, 2);
@@ -198,6 +198,7 @@ EXTRACT_HELPER(UIM, 16, 2);
 EXTRACT_HELPER(SHW, 8, 2);
 EXTRACT_HELPER(SP, 19, 2);
 EXTRACT_HELPER(IMM8, 11, 8);
+EXTRACT_HELPER_SPLIT_3(DCMX_XV, 5, 16, 0, 1, 2, 5, 1, 6, 6);
 
 typedef union _ppc_vsr_t {
 uint8_t u8[16];
diff --git a/target/ppc/translate/vsx-impl.inc.c 
b/target/ppc/translate/vsx-impl.inc.c
index 9bcc5af..adb6fc7 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -928,6 +928,8 @@ GEN_VSX_HELPER_2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
 GEN

Re: [Qemu-devel] [PATCH RFC v3 04/14] intel_iommu: fix trace for inv desc handling

2017-01-13 Thread Jason Wang




On 2017年01月13日 17:13, Peter Xu wrote:

On Fri, Jan 13, 2017 at 03:46:31PM +0800, Jason Wang wrote:


On 2017年01月13日 11:06, Peter Xu wrote:

VT-d codes are still using static DEBUG_INTEL_IOMMU macro. That's not
good, and we should end the day when we need to recompile the code
before getting useful debugging information for vt-d. Time to switch to
the trace system.

This is the first patch to do it.

Generally, the rule of mine is:

- for the old GENERAL typed message, I use error_report() directly if
   apply. Those are something shouldn't happen, and we should print those
   errors in all cases, even without enabling debug and tracing.

Looks like some were guest trigger-able. If yes, let's try not use
error_report() for not being flooded.

Yes, it's intended. Most of the error_report()s in this patch can be
triggered by guest, but only by illegal guest behaviors (e.g.,
non-zero reserved fields, or illegal descriptors, etc.). In that
sense, shall we keep them even guest can trigger them? Since people
will never see them if they are running generic and good kernels. More
importantly, these error_report()s can be good hints when guest
encounters issues, for better debugging and triaging.

Actually we have such usage in existing QEMU as well. For example,
when we maintain the DMA mapping in vfio-pci, it's possible that the
shadow page table is mapped illegally due to some reason (that depends
on the guest as well, may not be guest kernel, but DPDK applications
inside guest), and the map() can fail. Here we have:

 ret = vfio_dma_map(container, iova,
 iotlb->addr_mask + 1, vaddr,
 !(iotlb->perm & IOMMU_WO) || mr->readonly);
 if (ret) {
 error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
 "0x%"HWADDR_PRIx", %p) = %d (%m)",
 container, iova,
 iotlb->addr_mask + 1, vaddr, ret);
 }

Which I think is playing the same role here - we will never see these
lines if the guest is normal, and these lines will be useful when bad
things happen.

So I would slightly prefer that we keep these error_reports() for now,
as long as they won't flush the screen for most of the users. (during
the time I played with this series, none of them jumped out :)


I think the point is just surviving from malicious guests. So we need 
avoid guest trigger-able thing likes this, consider if we redirect 
stderr to a log file, malicious guest may exhaust disk space which is a 
DOS. So we'd better avoid them.


Thanks



Thanks,

-- peterx

Re: [Qemu-devel] [PATCH RFC v3 07/14] memory: add section range info for IOMMU notifier

2017-01-13 Thread Jason Wang




On 2017年01月13日 17:23, Peter Xu wrote:

On Fri, Jan 13, 2017 at 03:55:22PM +0800, Jason Wang wrote:


On 2017年01月13日 11:06, Peter Xu wrote:

In this patch, IOMMUNotifier.{start|end} are introduced to store section
information for a specific notifier. When notification occurs, we not
only check the notification type (MAP|UNMAP), but also check whether the
notified iova is in the range of specific IOMMU notifier, and skip those
notifiers if not in the listened range.

When removing an region, we need to make sure we removed the correct
VFIOGuestIOMMU by checking the IOMMUNotifier.start address as well.

Suggested-by: David Gibson 
Reviewed-by: David Gibson 
Acked-by: Paolo Bonzini 
Signed-off-by: Peter Xu 
---
  hw/vfio/common.c  | 7 ++-
  include/exec/memory.h | 3 +++
  memory.c  | 4 +++-
  3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 801578b..6f648da 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -455,6 +455,10 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
  giommu->container = container;
  giommu->n.notify = vfio_iommu_map_notify;
  giommu->n.notifier_flags = IOMMU_NOTIFIER_ALL;
+giommu->n.start = section->offset_within_region;
+llend = int128_add(int128_make64(giommu->n.start), section->size);
+llend = int128_sub(llend, int128_one());
+giommu->n.end = int128_get64(llend);
  QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
  memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
@@ -525,7 +529,8 @@ static void vfio_listener_region_del(MemoryListener 
*listener,
  VFIOGuestIOMMU *giommu;
  QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
-if (giommu->iommu == section->mr) {
+if (giommu->iommu == section->mr &&
+giommu->n.start == section->offset_within_region) {
  memory_region_unregister_iommu_notifier(giommu->iommu,
  &giommu->n);
  QLIST_REMOVE(giommu, giommu_next);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index bec9756..7649e74 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -84,6 +84,9 @@ typedef enum {
  struct IOMMUNotifier {
  void (*notify)(struct IOMMUNotifier *notifier, IOMMUTLBEntry *data);
  IOMMUNotifierFlag notifier_flags;
+/* Notify for address space range start <= addr <= end */
+hwaddr start;
+hwaddr end;
  QLIST_ENTRY(IOMMUNotifier) node;
  };
  typedef struct IOMMUNotifier IOMMUNotifier;
diff --git a/memory.c b/memory.c
index 2bfc37f..e88bb54 100644
--- a/memory.c
+++ b/memory.c
@@ -1671,7 +1671,9 @@ void memory_region_notify_iommu(MemoryRegion *mr,
  }
  QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
-if (iommu_notifier->notifier_flags & request_flags) {
+if (iommu_notifier->notifier_flags & request_flags &&
+iommu_notifier->start <= entry.iova &&
+iommu_notifier->end >= entry.iova) {
  iommu_notifier->notify(iommu_notifier, &entry);
  }
  }

This seems breaks vhost device IOTLB. How about keep the the behavior
somehow?

Thanks to point out. How about I squash this into this patch?

8<
diff --git a/memory.c b/memory.c
index e88bb54..6de02dd 100644
--- a/memory.c
+++ b/memory.c
@@ -1608,8 +1608,14 @@ void memory_region_register_iommu_notifier(MemoryRegion 
*mr,
  return;
  }
  
+if (n->start == 0 && n->end == 0) {

+/* If these are not specified, we listen to the whole range */
+n->end = (hwaddr)(-1);
+}
+
  /* We need to register for at least one bitfield */
  assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
+assert(n->start <= n->end);
  QLIST_INSERT_HEAD(&mr->iommu_notify, n, node);
  memory_region_update_iommu_notify_flags(mr);
  }
>8

-- peterx


This should work, or you can introduce a 
memory_region_iommu_notifier_init() to force user to explicitly 
initialize start and end.


Thanks

Re: [Qemu-devel] [PULL 00/33] Misc patches for 2017-01-11

2017-01-13 Thread Peter Maydell

On 11 January 2017 at 19:34, Paolo Bonzini  wrote:
> The following changes since commit 41a0e54756a9ae6b60be34bb33302a7e085fdb07:
>
>   Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging 
> (2017-01-10 10:46:21 +)
>
> are available in the git repository at:
>
>
>   git://github.com/bonzini/qemu.git tags/for-upstream
>
> for you to fetch changes up to af89d1b204f03e10ade3b61365c7e5c06805caee:
>
>   Revert "win32: don't run subprocess tests on Mingw32 platform" (2017-01-11 
> 20:13:12 +0100)
>
> 
> * QOM interface fix (Eduardo)
> * RTC fixes (Gaohuai, Igor)
> * Memory leak fixes (Li Qiang, me)
> * Ctrl-a b regression (Marc-André)
> * Stubs cleanups and fixes (Leif, me)
> * hxtool tweak (me)
> * HAX support (Vincent)
> * QemuThread, exec.c, target-i386 and SCSI fixes (Roman, Xinhua, me)
>
> 

This failed to build on ppc64be:

/home/pm215/qemu/target/ppc/kvm.c: In function ‘kvm_handle_debug’:
/home/pm215/qemu/target/ppc/kvm.c:1724:9: error: implicit declaration
of function ‘cpu_synchronize_s
tate’ [-Werror=implicit-function-declaration]
 cpu_synchronize_state(cs);
 ^

In file included from /home/pm215/qemu/linux-headers/linux/kvm.h:13:0,
 from /home/pm215/qemu/include/sysemu/kvm.h:23,
 from /home/pm215/qemu/include/sysemu/hw_accel.h:16,
 from /home/pm215/qemu/target/ppc/translate_init.c:27,
 from /home/pm215/qemu/target/ppc/translate.c:6786:
/home/pm215/qemu/build/all/linux-headers/asm/kvm.h:320:0: error:
"KVM_INTERRUPT_SET" redefined [-Wer
ror]
 #define KVM_INTERRUPT_SET -1U
 ^
In file included from /home/pm215/qemu/target/ppc/translate_init.c:24:0,
 from /home/pm215/qemu/target/ppc/translate.c:6786:
/home/pm215/qemu/target/ppc/kvm_ppc.h:319:0: note: this is the
location of the previous definition
 #define KVM_INTERRUPT_SET -1
 ^
In file included from /home/pm215/qemu/linux-headers/linux/kvm.h:13:0,
 from /home/pm215/qemu/include/sysemu/kvm.h:23,
 from /home/pm215/qemu/include/sysemu/hw_accel.h:16,
 from /home/pm215/qemu/target/ppc/translate_init.c:27,
 from /home/pm215/qemu/target/ppc/translate.c:6786:
/home/pm215/qemu/build/all/linux-headers/asm/kvm.h:321:0: error:
"KVM_INTERRUPT_UNSET" redefined [-Werror]
 #define KVM_INTERRUPT_UNSET -2U
 ^
In file included from /home/pm215/qemu/target/ppc/translate_init.c:24:0,
 from /home/pm215/qemu/target/ppc/translate.c:6786:
/home/pm215/qemu/target/ppc/kvm_ppc.h:323:0: note: this is the
location of the previous definition
 #define KVM_INTERRUPT_UNSET -2
 ^
In file included from /home/pm215/qemu/linux-headers/linux/kvm.h:13:0,
 from /home/pm215/qemu/include/sysemu/kvm.h:23,
 from /home/pm215/qemu/include/sysemu/hw_accel.h:16,
 from /home/pm215/qemu/target/ppc/translate_init.c:27,
 from /home/pm215/qemu/target/ppc/translate.c:6786:
/home/pm215/qemu/build/all/linux-headers/asm/kvm.h:322:0: error:
"KVM_INTERRUPT_SET_LEVEL" redefined [-Werror]
 #define KVM_INTERRUPT_SET_LEVEL -3U
 ^
In file included from /home/pm215/qemu/target/ppc/translate_init.c:24:0,
 from /home/pm215/qemu/target/ppc/translate.c:6786:
/home/pm215/qemu/target/ppc/kvm_ppc.h:327:0: note: this is the
location of the previous definition
 #define KVM_INTERRUPT_SET_LEVEL -3
 ^


thanks
-- PMM

Re: [Qemu-devel] [PULL 0/4] migration: QTAILQ migration

2017-01-13 Thread Peter Maydell

On 13 January 2017 at 03:37, Amit Shah  wrote:
> On (Fri) 06 Jan 2017 [12:10:22], Peter Maydell wrote:
>> On 5 January 2017 at 16:32, Amit Shah  wrote:
>> > The following changes since commit 
>> > dbe2b65566e76d3c3a0c3358285c0336ac61e757:
>> >
>> >   Merge remote-tracking branch 
>> > 'remotes/vivier/tags/m68k-for-2.9-pull-request' into staging (2016-12-28 
>> > 17:11:11 +)
>> >
>> > are available in the git repository at:
>> >
>> >   https://git.kernel.org/pub/scm/virt/qemu/amit/migration.git 
>> > tags/migration-for-2.9-1
>> >
>> > for you to fetch changes up to 8d0c57697abcc57166e918064d9a7f6c7316d01b:
>> >
>> >   migration: add error_report (2017-01-04 19:21:26 +0530)
>> >
>> > 
>> > Migration: migrate QTAILQ infrastructure for vmstate
>> >
>> > 
>>
>> Hi. I'm afraid this fails 'make check' for ppc64be:
>
> Hi Peter, since this was due to something else, will you try to pull
> this in again?  Or should I re-send the series?

I've put it back on my list to process.

thanks
-- PMM

Re: [Qemu-devel] [PATCH] Further tidy-up on block status

2017-01-13 Thread Vladimir Sementsov-Ogievskiy


12.01.2017 16:11, Alex Bligh wrote:

On 12 Jan 2017, at 07:05, Vladimir Sementsov-Ogievskiy 
 wrote:

Yes this is better. But is it actually needed to force contexts have some safe 
default? If context wants it may define such default without this requirement.. 
So, should it be requirement at all?

I've changed this to:

 of the file), a server MAY reply with a single block status
 descriptor with *length* matching the requested length, rather than
 reporting the error; in this case the context MAY mandate the
 status returned.




Hmm, I don't understand. So, it MAY mandate and therefore MAY NOT do it? 
And what client should think, if server replies with one chunk matching 
the request length and not mandate the status?



--
Best regards,
Vladimir

Re: [Qemu-devel] [PATCH v3] monitor: Fix crashes when using HMP commands without CPU

2017-01-13 Thread Markus Armbruster

Markus Armbruster  writes:

> Thomas Huth  writes:
>
>> On 12.01.2017 17:22, Markus Armbruster wrote:
>>> Thomas Huth  writes:
>>> 
 When running certain HMP commands ("info registers", "info cpustats",
 "nmi", "memsave" or dumping virtual memory) with the "none" machine,
 QEMU crashes with a segmentation fault. This happens because the "none"
 machine does not have any CPUs by default, but these HMP commands did
 not check for a valid CPU pointer yet. Add such checks now, so we get
 an error message about the missing CPU instead.

 Reviewed-by: Dr. David Alan Gilbert 
 Signed-off-by: Thomas Huth 
 ---
  v3:
  - Use UNASSIGNED_CPU_INDEX instead of hard-coded -1
  v2:
  - Added more checks to cover "nmi" and "memsave", too

  hmp.c |  8 +++-
  monitor.c | 37 +++--
  2 files changed, 38 insertions(+), 7 deletions(-)

 diff --git a/hmp.c b/hmp.c
 index b869617..b1c503a 100644
 --- a/hmp.c
 +++ b/hmp.c
 @@ -1013,8 +1013,14 @@ void hmp_memsave(Monitor *mon, const QDict *qdict)
  const char *filename = qdict_get_str(qdict, "filename");
  uint64_t addr = qdict_get_int(qdict, "val");
  Error *err = NULL;
 +int cpu_index = monitor_get_cpu_index();
  
 -qmp_memsave(addr, size, filename, true, monitor_get_cpu_index(), 
 &err);
 +if (cpu_index < 0) {
 +monitor_printf(mon, "No CPU available\n");
 +return;
 +}
 +
 +qmp_memsave(addr, size, filename, true, cpu_index, &err);
  hmp_handle_error(mon, &err);
  }
  
 diff --git a/monitor.c b/monitor.c
 index 0841d43..17121ff 100644
 --- a/monitor.c
 +++ b/monitor.c
 @@ -1025,6 +1025,9 @@ int monitor_set_cpu(int cpu_index)
  CPUState *mon_get_cpu(void)
  {
  if (!cur_mon->mon_cpu) {
 +if (!first_cpu) {
 +return NULL;
 +}
  monitor_set_cpu(first_cpu->cpu_index);
  }
  cpu_synchronize_state(cur_mon->mon_cpu);
 @@ -1033,17 +1036,27 @@ CPUState *mon_get_cpu(void)
  
  CPUArchState *mon_get_cpu_env(void)
  {
 -return mon_get_cpu()->env_ptr;
 +CPUState *cs = mon_get_cpu();
 +
 +return cs ? cs->env_ptr : NULL;
  }
  
  int monitor_get_cpu_index(void)
  {
 -return mon_get_cpu()->cpu_index;
 +CPUState *cs = mon_get_cpu();
 +
 +return cs ? cs->cpu_index : UNASSIGNED_CPU_INDEX;
  }
  
  static void hmp_info_registers(Monitor *mon, const QDict *qdict)
  {
 -cpu_dump_state(mon_get_cpu(), (FILE *)mon, monitor_fprintf, 
 CPU_DUMP_FPU);
 +CPUState *cs = mon_get_cpu();
 +
 +if (!cs) {
 +monitor_printf(mon, "No CPU available\n");
 +return;
 +}
 +cpu_dump_state(cs, (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU);
  }
  
  static void hmp_info_jit(Monitor *mon, const QDict *qdict)
 @@ -1076,7 +1089,13 @@ static void hmp_info_history(Monitor *mon, const 
 QDict *qdict)
  
  static void hmp_info_cpustats(Monitor *mon, const QDict *qdict)
  {
 -cpu_dump_statistics(mon_get_cpu(), (FILE *)mon, &monitor_fprintf, 0);
 +CPUState *cs = mon_get_cpu();
 +
 +if (!cs) {
 +monitor_printf(mon, "No CPU available\n");
 +return;
 +}
 +cpu_dump_statistics(cs, (FILE *)mon, &monitor_fprintf, 0);
  }
  
  static void hmp_info_trace_events(Monitor *mon, const QDict *qdict)
 @@ -1235,6 +1254,12 @@ static void memory_dump(Monitor *mon, int count, 
 int format, int wsize,
  int l, line_size, i, max_digits, len;
  uint8_t buf[16];
  uint64_t v;
 +CPUState *cs = mon_get_cpu();
 +
 +if (!cs && (format == 'i' || !is_physical)) {
 +monitor_printf(mon, "Can not dump without CPU\n");
 +return;
 +}
>>> 
>>> This is basically "if (we're going to dereference cs)".  Not so nice, in
>>> particular since the dereferences are hidden inside mon_get_cpu_env()
>>> calls.  I guess it'll do.
>>> 
  
  if (format == 'i') {
  int flags = 0;
 @@ -1264,7 +1289,7 @@ static void memory_dump(Monitor *mon, int count, int 
 format, int wsize,
  flags = msr_le << 16;
  flags |= env->bfd_mach;
  #endif
 -monitor_disas(mon, mon_get_cpu(), addr, count, is_physical, 
 flags);
 +monitor_disas(mon, cs, addr, count, is_physical, flags);
  return;
  }
  
 @@ -1303,7 +1328,7 @@ static void memory_dump(Monitor *mon, int count, int 
 format, int wsize,
  if (is_physical) {
  cpu_physical_memory_read(addr, buf, l);
  } else {
 -if (cpu_memory_rw_debug(mon_get_cpu(), addr, buf, l,

Re: [Qemu-devel] [PATCH] virtio-ccw: fix ring sizing

2017-01-13 Thread Cornelia Huck

On Thu, 12 Jan 2017 23:26:22 +0200
"Michael S. Tsirkin"  wrote:

> Current code seems to assume ring size is
> always decreased but this is not required by spec:
> what spec says is just that size can not exceed
> the maximum. Fix it up.
> 
> Signed-off-by: Michael S. Tsirkin 
> ---
>  include/hw/virtio/virtio.h | 1 +
>  hw/s390x/virtio-ccw.c  | 2 +-
>  hw/virtio/virtio.c | 5 +
>  3 files changed, 7 insertions(+), 1 deletion(-)

Yes, makes sense. Queued to s390-next.

Re: [Qemu-devel] [PATCH v6 wave 2 1/3] hw/isa/lpc_ich9: add SMI feature negotiation via fw_cfg

2017-01-13 Thread Igor Mammedov

On Thu, 12 Jan 2017 19:24:44 +0100
Laszlo Ersek  wrote:

> Introduce the following fw_cfg files:
> 
> - "etc/smi/supported-features": a little endian uint64_t feature bitmap,
>   presenting the features known by the host to the guest. Read-only for
>   the guest.
> 
>   The content of this file will be determined via bit-granularity ICH9-LPC
>   device properties, to be introduced later. For now, the bitmask is left
>   zeroed. The bits will be set from machine type compat properties and on
>   the QEMU command line, hence this file is not migrated.
> 
> - "etc/smi/requested-features": a little endian uint64_t feature bitmap,
>   representing the features the guest would like to request. Read-write
>   for the guest.
> 
>   The guest can freely (re)write this file, it has no direct consequence.
>   Initial value is zero. A nonzero value causes the SMI-related fw_cfg
>   files and fields that are under guest influence to be migrated.
> 
> - "etc/smi/features-ok": contains a uint8_t value, and it is read-only for
>   the guest. When the guest selects the associated fw_cfg key, the guest
>   features are validated against the host features. In case of error, the
>   negotiation doesn't proceed, and the "features-ok" file remains zero. In
>   case of success, the "features-ok" file becomes (uint8_t)1, and the
>   negotiated features are locked down internally (to which no further
>   changes are possible until reset).
> 
>   The initial value is zero.  A nonzero value causes the SMI-related
>   fw_cfg files and fields that are under guest influence to be migrated.
I'm still not quite sure if we need all this negotiation thingy with
all complexity it brings in when looking from cpu hotplug pov.

Paolo mentioned following security implications:
 1: OS could trigger broadcast SMI and try to hijack SMI handler for not
yet relocated default SMI base of hotplugged CPU.
That [c|sh]ould be handled by firmware protecting default SMI base.
 2: Even if firmware protected default SMI base, OS still could
cause undefined behavior by sending broadcast SMI in case if more than
1 CPU has been hotplugged, which would make unconfigured CPUs
use the same SMI base simultaneously.
Paolo suggested that real HW avoids the issue by making hotplugged CPUs
"parked" until firmware unparks it from its SMI handler.
So that's adds one more runtime state to migrate and qemu-guest ABI knob
to maintain even if we ignore that there is no such terms as '(un)parked' 
in SDM.

How about considering an alternative simpler approach:
 * QEMU provides only "etc/smi/supported-features" file with SMI broadcast
   (no need to migrate)
 * firmware takes care of #1 by protecting default SMI base and using
   broadcast SMI if "etc/smi/supported-features" advertises it.
 * and QEMU could deal with #2 issue by just crashing guest as it tries
   to invoke undefined behavior (i.e. check that there is only 1 CPU with
   default SMI base and crash if there are more).
With this approach there is not need to negotiate nor migrate extra state
and inventing an unSPECed unpark knob for CPU hotplug could be avoided
(i.e. less qemu-guest ABI to maintain).


> 
> The C-language fields backing the "supported-features" and
> "requested-features" files are uint8_t arrays. This is because they carry
> guest-side representation (our choice is little endian), while
> VMSTATE_UINT64() assumes / implies host-side endianness for any uint64_t
> fields. If we migrate a guest between hosts with different endiannesses
> (which is possible with TCG), then the host-side value is preserved, and
> the host-side representation is translated. This would be visible to the
> guest through fw_cfg, unless we used plain byte arrays. So we do.
> 
> Cc: "Michael S. Tsirkin" 
> Cc: Gerd Hoffmann 
> Cc: Igor Mammedov 
> Cc: Paolo Bonzini 
> Signed-off-by: Laszlo Ersek 
> Reviewed-by: Michael S. Tsirkin 
> ---
> 
> Notes:
> v6:
> - no changes, pick up Michael's R-b
> 
> v5:
> - rename the "etc/smi/host-features" fw_cfg file to
>   "etc/smi/supported-features" [Igor]
> 
> - rename the "etc/smi/guest-features" fw_cfg file to
>   "etc/smi/requested-features" [Igor]
> 
> - suffix the names of the "ICH9LPCState.smi_host_features" and
>   "ICH9LPCState.smi_guest_features" array fields with "_le" for
>   representing their guest-visible encoding [Igor]
> 
> - Replace the "smi_host_features" parameter of ich9_lpc_pm_init() --
>   which was meant in v4 to be set by  board code -- with a new
>   "ICH9LPCState.smi_host_features" field, of type uint64_t.
>   Bit-granularity ICH9-LPC device properties will be carved out of this
>   field. [Igor]
> 
> - Given the "ICH9LPCState.smi_host_features" uint64_t field, we can now
>   use that directly for feature validation in
>   smi_features_ok_callback(). Converting the (otherwise guest-read-only)
>   "ICH9LPCState.smi_host_features_le"

Re: [Qemu-devel] [PATCH RFC v3 07/14] memory: add section range info for IOMMU notifier

2017-01-13 Thread Peter Xu

On Fri, Jan 13, 2017 at 05:37:43PM +0800, Jason Wang wrote:

[...]

> >>>diff --git a/memory.c b/memory.c
> >>>index 2bfc37f..e88bb54 100644
> >>>--- a/memory.c
> >>>+++ b/memory.c
> >>>@@ -1671,7 +1671,9 @@ void memory_region_notify_iommu(MemoryRegion *mr,
> >>>  }
> >>>  QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
> >>>-if (iommu_notifier->notifier_flags & request_flags) {
> >>>+if (iommu_notifier->notifier_flags & request_flags &&
> >>>+iommu_notifier->start <= entry.iova &&
> >>>+iommu_notifier->end >= entry.iova) {
> >>>  iommu_notifier->notify(iommu_notifier, &entry);
> >>>  }
> >>>  }
> >>This seems breaks vhost device IOTLB. How about keep the the behavior
> >>somehow?
> >Thanks to point out. How about I squash this into this patch?
> >
> >8<
> >diff --git a/memory.c b/memory.c
> >index e88bb54..6de02dd 100644
> >--- a/memory.c
> >+++ b/memory.c
> >@@ -1608,8 +1608,14 @@ void 
> >memory_region_register_iommu_notifier(MemoryRegion *mr,
> >  return;
> >  }
> >+if (n->start == 0 && n->end == 0) {
> >+/* If these are not specified, we listen to the whole range */
> >+n->end = (hwaddr)(-1);
> >+}
> >+
> >  /* We need to register for at least one bitfield */
> >  assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
> >+assert(n->start <= n->end);
> >  QLIST_INSERT_HEAD(&mr->iommu_notify, n, node);
> >  memory_region_update_iommu_notify_flags(mr);
> >  }
> >>8
> >
> >-- peterx
> 
> This should work, or you can introduce a memory_region_iommu_notifier_init()
> to force user to explicitly initialize start and end.

Hmm, this sounds better, considering that IOMMUNotifier is getting
more fields to be inited. Thanks,

-- peterx

Re: [Qemu-devel] [PATCH] Further tidy-up on block status

2017-01-13 Thread Alex Bligh

> On 13 Jan 2017, at 09:48, Vladimir Sementsov-Ogievskiy 
>  wrote:
> 
> 12.01.2017 16:11, Alex Bligh wrote:
>>> On 12 Jan 2017, at 07:05, Vladimir Sementsov-Ogievskiy 
>>>  wrote:
>>> 
>>> Yes this is better. But is it actually needed to force contexts have some 
>>> safe default? If context wants it may define such default without this 
>>> requirement.. So, should it be requirement at all?
>> I've changed this to:
>> 
>> of the file), a server MAY reply with a single block status
>> descriptor with *length* matching the requested length, rather than
>> reporting the error; in this case the context MAY mandate the
>> status returned.
>> 
>> 
> 
> Hmm, I don't understand. So, it MAY mandate and therefore MAY NOT do it? And 
> what client should think, if server replies with one chunk matching the 
> request length and not mandate the status?

Some contexts may mandate a particular value (so for instance the allocation 
context might mandate 0).

Some contexts may not mandate a particular value, in which case the 
interpretation is dependent upon the context (just like any other status 
value). EG a context which returned an status of 7 if the range contained a 
prime number, and else 3, could carry on doing that.

As it doesn't make sense to interpret status returns without an understanding 
of the particular context, we might as well simply extend this to 'beyond the 
range' returns - as I think you pointed out!

-- 
Alex Bligh

Re: [Qemu-devel] [PATCH RFC v3 2/2] block/qapi: reduce the execution time of qmp_query_blockstats

2017-01-13 Thread Markus Armbruster

Dou Liyang  writes:

> In order to reduce the execution time, this patch optimize
> the qmp_query_blockstats():
> Remove the next_query_bds function.
> Remove the bdrv_query_stats function.
> Remove some judgement sentence.
>
> The original qmp_query_blockstats calls next_query_bds to get
> the next objects in each loops. In the next_query_bds, it checks
> the query_nodes and blk. It also call bdrv_query_stats to get
> the stats, In the bdrv_query_stats, it checks blk and bs each
> times. This waste more times, which may stall the main loop a
> bit. And if the disk is too many and donot use the dataplane
> feature, this may affect the performance in main loop thread.
>
> This patch removes that two functions, and makes the structure
> clearly.
>
> Signed-off-by: Dou Liyang 
> ---
>  block/qapi.c | 72 
> +++-
>  1 file changed, 28 insertions(+), 44 deletions(-)
>
> diff --git a/block/qapi.c b/block/qapi.c
> index bc622cd..6e1e8bd 100644
> --- a/block/qapi.c
> +++ b/block/qapi.c
> @@ -456,23 +456,6 @@ static BlockStats *bdrv_query_bds_stats(const 
> BlockDriverState *bs,
>  return s;
>  }
>  
> -static BlockStats *bdrv_query_stats(BlockBackend *blk,
> -const BlockDriverState *bs,
> -bool query_backing)
> -{
> -BlockStats *s;
> -
> -s = bdrv_query_bds_stats(bs, query_backing);
> -
> -if (blk) {
> -s->has_device = true;
> -s->device = g_strdup(blk_name(blk));
> -bdrv_query_blk_stats(s->stats, blk);
> -}
> -
> -return s;
> -}
> -
>  BlockInfoList *qmp_query_block(Error **errp)
>  {
>  BlockInfoList *head = NULL, **p_next = &head;
> @@ -496,42 +479,43 @@ BlockInfoList *qmp_query_block(Error **errp)
>  return head;
>  }
>  
> -static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
> -   bool query_nodes)
> -{
> -if (query_nodes) {
> -*bs = bdrv_next_node(*bs);
> -return !!*bs;
> -}
> -
> -*blk = blk_next(*blk);
> -*bs = *blk ? blk_bs(*blk) : NULL;
> -
> -return !!*blk;
> -}
> -
>  BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
>   bool query_nodes,
>   Error **errp)
>  {
>  BlockStatsList *head = NULL, **p_next = &head;
> -BlockBackend *blk = NULL;
> -BlockDriverState *bs = NULL;
> +BlockBackend *blk;
> +BlockDriverState *bs;
>  
>  /* Just to be safe if query_nodes is not always initialized */
> -query_nodes = has_query_nodes && query_nodes;
> -
> -while (next_query_bds(&blk, &bs, query_nodes)) {
> -BlockStatsList *info = g_malloc0(sizeof(*info));
> -AioContext *ctx = blk ? blk_get_aio_context(blk)
> -  : bdrv_get_aio_context(bs);
> +if (has_query_nodes && query_nodes) {
> +for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) {
> +BlockStatsList *info = g_malloc0(sizeof(*info));
> +AioContext *ctx = bdrv_get_aio_context(bs);
>  
> -aio_context_acquire(ctx);
> -info->value = bdrv_query_stats(blk, bs, !query_nodes);
> -aio_context_release(ctx);
> +aio_context_acquire(ctx);
> +info->value = bdrv_query_bds_stats(bs, false);
> +aio_context_release(ctx);
>  
> -*p_next = info;
> -p_next = &info->next;
> +*p_next = info;
> +p_next = &info->next;
> +}
> +} else {
> +for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
> +BlockStatsList *info = g_malloc0(sizeof(*info));
> +AioContext *ctx = blk_get_aio_context(blk);
> +
> +aio_context_acquire(ctx);
> +BlockStats *s = bdrv_query_bds_stats(blk_bs(blk), true);

Please don't put declarations after statements.  Suggest:

   BlockStats *s;

   aio_context_acquire(ctx);
   info->value = s = bdrv_query_bds_stats(blk_bs(blk), true);

> +s->has_device = true;
> +s->device = g_strdup(blk_name(blk));
> +bdrv_query_blk_stats(s->stats, blk);
> +aio_context_release(ctx);
> +
> +info->value = s;
> +*p_next = info;
> +p_next = &info->next;
> +}
>  }
>  
>  return head;

[Qemu-devel] Data corruption in Qemu 2.7.1

2017-01-13 Thread Peter Lieven

Hi,

i currently facing a problem in our testing environment where I see file system 
corruption with 2.7.1 on iSCSI and Local Storage (LVM).
Trying to bisect, but has anyone observed this before?

Thanks,
Peter

Re: [Qemu-devel] [PATCH RFC v3 0/2] block/qapi: refactor and optimize the qmp_query_blockstats()

2017-01-13 Thread Markus Armbruster

Dou Liyang  writes:

> Change log v2 -> v3:
>  1. Remove the unnecessary code for the bdrv_next_node().
>  2. Remove the change of the locking rules.
> Even if this change can improve the performance, but it may
> effect the consistency.
>
> For the multi-disks guest, we can use the dataplane feature to
> hold performance does not drop, if we execute some slow monitor
> commands, such as "info blockstats". But, without this feature, 
> How to reduce the decline in performance?
>
> These patches aim to refactor the qmp_query_blockstats() and
> improve the performance by reducing the running time of it.
>
> There are the two jobs:
>
> 1 For the performance:
>
> 1.1 the time it takes(ns) in each time:
> the disk numbers | 10| 500
> -
> before these patches | 19429 | 667722 
> after these patches  | 18536 | 627945
>
> 1.2 the I/O performance is degraded(%) during the monitor:
>
> the disk numbers | 10| 500
> -
> before these patches | 1.3   | 14.2
> after these patches  | 1.0   | 11.3
>
> used the dd command likes this to test: 
> dd if=date_1.dat of=date_2.dat conv=fsync oflag=direct bs=1k count=100k.
>
> 2 refactor qmp_query_blockstats():
>
> From:
>
> +--+  +-+
>  | 1|  | 4.  |
>  |next_query_bds|  |bdrv_query_bds_stats +---+
>  |  |  | |   |
>  +^-+  +-^---+   |
>   |  |   |
> +-+--+  ++---+   |
> | 0. |  | 2. |   |
> |qmp_query_blockstats+-->bdrv_query_stats<
> ||  ||
> ++  ++---+
>  |
>+-v---+
>| 3.  |
>|bdrv_query_blk_stats |
>| |
>+-+
>
> To:
>
> +--+
> |  |
>+v---+  |
>+--->  3.|  |
> +---+  |   |bdrv_query_bds_stats+--+
> | 1.+--+   ||
> |   +  ++
> |qmp_query_blockstats--+
> |   |  |
> +---+  |   ++
>|   | 2. |
>+--->|
>|bdrv_query_blk_stats|
>||
>++

Thanks for the picture, it helps.

Your patches make the code easier to follow, and shorter, too.  The
performance impact is unexpected.  But the code cleanup is worthwhile
even without it.  Please develop this into a non-RFC patch submission.

Re: [Qemu-devel] [PULL 00/67] ppc-for-2.9 queue 20170112

2017-01-13 Thread Peter Maydell

On 12 January 2017 at 02:02, David Gibson  wrote:
> The following changes since commit b44486dfb9447c88e4b216e730adcc780190852c:
>
>   Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20170110-1' into 
> staging (2017-01-10 14:52:34 +)
>
> are available in the git repository at:
>
>   git://github.com/dgibson/qemu.git tags/ppc-for-2.9-20170112
>
> for you to fetch changes up to 229b5b6b56db9c3e6b8f44b4b55a62fa32979f47:
>
>   ppc: Fix a warning in bcdcfz code and improve BCD_DIG_BYTE macro 
> (2017-01-12 10:21:49 +1100)
>
> 
> ppc patch queue 2017-01-12
>
> This is the first ppc pull request for qemu-2.9.  It's been a while
> coming, partly due to dealing with some problems running my usual set
> of tests.  So, there's quite a lot in here.
>
> * More POWER9 instruction implementations for TCG
> * The simpler parts of my CPU compatibility mode cleanup
> * This changes behaviour to prefer compatibility modes over
>   "raW" mode for new machine type versions
> * New "40p" machine type which is essentially a modernized and
>   cleaned up "prep".  The intention is that it will replace "prep"
>   once it has some more testing and polish.
> * Add pseries-2.9 machine type
> * Implement H_SIGNAL_SYS_RESET hypercall
> * Consolidate the two alternate CPU init paths in pseries by
>   making it always go through CPU core objects to initialize CPU
> * A handful of bugfixes and cleanups
>
> There are also some changes not strictly related to ppc code, but for
> its benefit:
>
> * Limit the pxi-expander-bridge (PXB) device to x86 guests only
>   (it's essentially a hack to work around historical x86
>   limitations)
> * Revise a number of qtests and enable them for ppc

Hi -- this fails "make check" on OSX host:

  GTESTER check-qtest-ppc64
qemu-system-ppc64: -object
memory-backend-file,id=mb1,size=1M,share,mem-path=/dev/shm/qtest-68237-4247260626:
invalid object type: memory-backend-file
Broken pipe
GTester: last random seed: R02Sc13bf53b8b3055a3c30552f67d7c263b
**
ERROR:/Users/pm215/src/qemu-for-merges/tests/libqos/pci.c:412:void
qpci_plug_device_test(const char *, const char *, uint8_t, const char
*): assertion failed: (!qdict_haskey(response, "error"))
GTester: last random seed: R02Sf3594a21c711ec941d5f4c89724ce12d
qemu-system-ppc64: -device ivshmem-plain,memdev=mb1: 'ivshmem-plain'
is not a valid device model name
Broken pipe

It looks like you've enabled the ivshmem test unconditionally,
but it should only be turned on if CONFIG_EVENTFD is true, see
how we do this for x86:

check-qtest-pci-$(CONFIG_EVENTFD) += tests/ivshmem-test$(EXESUF)
gcov-files-pci-y += hw/misc/ivshmem.c

Q: can we just enable these tests for PCI with
check-qtest-ppc64-y += $(check-qtest-pci-y)
gcov-files-ppc64-y += $(gcov-files-pci-y)

the way we do for i386/x86_64 ? Or is that doing too much
unwanted extra testing?

thanks
-- PMM

[Qemu-devel] [PATCH] ahci: advertise HOST_CAP_64

2017-01-13 Thread Ladi Prosek

The AHCI emulation code supports 64-bit addressing and should advertise this
fact in the Host Capabilities register. Both Linux and Windows drivers test
this bit to decide if the upper 32 bits of various registers may be written
to, and at least some versions of Windows have a bug where DMA is attempted
with an address above 4GB but, in the absence of HOST_CAP_64, the upper 32
bits are left unititialized which leads to a memory corruption.

Signed-off-by: Ladi Prosek 
---
 hw/ide/ahci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 3c19bda..6a17acf 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -488,7 +488,7 @@ static void ahci_reg_init(AHCIState *s)
 s->control_regs.cap = (s->ports - 1) |
   (AHCI_NUM_COMMAND_SLOTS << 8) |
   (AHCI_SUPPORTED_SPEED_GEN1 << AHCI_SUPPORTED_SPEED) |
-  HOST_CAP_NCQ | HOST_CAP_AHCI;
+  HOST_CAP_NCQ | HOST_CAP_AHCI | HOST_CAP_64;
 
 s->control_regs.impl = (1 << s->ports) - 1;
 
-- 
2.7.4

Re: [Qemu-devel] [PULL 00/67] ppc-for-2.9 queue 20170112

2017-01-13 Thread Laurent Vivier

On 13/01/2017 11:54, Peter Maydell wrote:
> Q: can we just enable these tests for PCI with
> check-qtest-ppc64-y += $(check-qtest-pci-y)
> gcov-files-ppc64-y += $(gcov-files-pci-y)
> 
> the way we do for i386/x86_64 ? Or is that doing too much
> unwanted extra testing?

For most of the devices tested in check-qtest-pci-y, the test has not
been ported to ppc64 and fails (or hangs).

This is why we don't add the full list in check-qtest-ppc64-y.

Laurent

Re: [Qemu-devel] [PATCH v6 wave 2 1/3] hw/isa/lpc_ich9: add SMI feature negotiation via fw_cfg

2017-01-13 Thread Laszlo Ersek

On 01/13/17 11:15, Igor Mammedov wrote:
> On Thu, 12 Jan 2017 19:24:44 +0100
> Laszlo Ersek  wrote:
> 
>> Introduce the following fw_cfg files:
>>
>> - "etc/smi/supported-features": a little endian uint64_t feature bitmap,
>>   presenting the features known by the host to the guest. Read-only for
>>   the guest.
>>
>>   The content of this file will be determined via bit-granularity ICH9-LPC
>>   device properties, to be introduced later. For now, the bitmask is left
>>   zeroed. The bits will be set from machine type compat properties and on
>>   the QEMU command line, hence this file is not migrated.
>>
>> - "etc/smi/requested-features": a little endian uint64_t feature bitmap,
>>   representing the features the guest would like to request. Read-write
>>   for the guest.
>>
>>   The guest can freely (re)write this file, it has no direct consequence.
>>   Initial value is zero. A nonzero value causes the SMI-related fw_cfg
>>   files and fields that are under guest influence to be migrated.
>>
>> - "etc/smi/features-ok": contains a uint8_t value, and it is read-only for
>>   the guest. When the guest selects the associated fw_cfg key, the guest
>>   features are validated against the host features. In case of error, the
>>   negotiation doesn't proceed, and the "features-ok" file remains zero. In
>>   case of success, the "features-ok" file becomes (uint8_t)1, and the
>>   negotiated features are locked down internally (to which no further
>>   changes are possible until reset).
>>
>>   The initial value is zero.  A nonzero value causes the SMI-related
>>   fw_cfg files and fields that are under guest influence to be migrated.
> I'm still not quite sure if we need all this negotiation thingy with
> all complexity it brings in when looking from cpu hotplug pov.

It's not VCPU hotplug that necessitates feature negotiation at this
point. The broadcast SMI feature is more foundational than VCPU hotplug.
Broadcast SMI is necessary for *generally* improving correctness and
performance of the edk2 SMM stack, as built into OVMF and run on QEMU,
with VCPU hotplug not even in the picture.

Summarizing the feedback from Paolo, Michael and Kevin O'Connor, the
guidance was clearly that
- we needed feature negotiation,
- it should resemble virtio,
- it should not be some ad-hoc IO port hackery, but a reusable method
  for future firmware stuff that needs negotiation.

> 
> Paolo mentioned following security implications:

Frankly, for the scope of this work, I absolutely don't care about VCPU
hotplug specifically. VCPU hotplug is a feature that will certainly
depend on the robustness and reliability of the edk2 SMM stack, as it is
currently available for use in OVMF. These patches improve that
robustness and reliability.

The only consideration for VCPU hotplug at the moment is that, should it
need some negotiable features, the fw_cfg pattern should be able to
accommodate them. That's all.

Discussing any VCPU hotplug specifics at the moment has no merit, in my
opinion. I have not seen, or run, a single line of edk2 SMM code related
to VCPU hotplug. Whatever theories we make up will hang in the air.
Meanwhile the basic SMM stack doesn't work reliably -- it needs these
patches.

>  1: OS could trigger broadcast SMI and try to hijack SMI handler for not
> yet relocated default SMI base of hotplugged CPU.
> That [c|sh]ould be handled by firmware protecting default SMI base.
>  2: Even if firmware protected default SMI base, OS still could
> cause undefined behavior by sending broadcast SMI in case if more than
> 1 CPU has been hotplugged, which would make unconfigured CPUs
> use the same SMI base simultaneously.
> Paolo suggested that real HW avoids the issue by making hotplugged CPUs
> "parked" until firmware unparks it from its SMI handler.
> So that's adds one more runtime state to migrate and qemu-guest ABI knob
> to maintain even if we ignore that there is no such terms as '(un)parked' 
> in SDM.
> 
> How about considering an alternative simpler approach:
>  * QEMU provides only "etc/smi/supported-features" file with SMI broadcast
>(no need to migrate)
>  * firmware takes care of #1 by protecting default SMI base and using
>broadcast SMI if "etc/smi/supported-features" advertises it.
>  * and QEMU could deal with #2 issue by just crashing guest as it tries
>to invoke undefined behavior (i.e. check that there is only 1 CPU with
>default SMI base and crash if there are more).
> With this approach there is not need to negotiate nor migrate extra state
> and inventing an unSPECed unpark knob for CPU hotplug could be avoided
> (i.e. less qemu-guest ABI to maintain).

The virtio-like feature negotiation (with host-features, guest-features,
and features-ok) has been part of the design since v3
.

I'm confused why you are raising such concerns now, for v6, when the
v4->v5 iteration was done mainly to add

Re: [Qemu-devel] [PATCH v3] monitor: Fix crashes when using HMP commands without CPU

2017-01-13 Thread Thomas Huth

On 13.01.2017 08:59, Markus Armbruster wrote:
> Thomas Huth  writes:
> 
>> On 12.01.2017 17:22, Markus Armbruster wrote:
>>> Thomas Huth  writes:
>>>
 When running certain HMP commands ("info registers", "info cpustats",
 "nmi", "memsave" or dumping virtual memory) with the "none" machine,
 QEMU crashes with a segmentation fault. This happens because the "none"
 machine does not have any CPUs by default, but these HMP commands did
 not check for a valid CPU pointer yet. Add such checks now, so we get
 an error message about the missing CPU instead.

 Reviewed-by: Dr. David Alan Gilbert 
 Signed-off-by: Thomas Huth 
 ---
  v3:
  - Use UNASSIGNED_CPU_INDEX instead of hard-coded -1
  v2:
  - Added more checks to cover "nmi" and "memsave", too

  hmp.c |  8 +++-
  monitor.c | 37 +++--
  2 files changed, 38 insertions(+), 7 deletions(-)

 diff --git a/hmp.c b/hmp.c
 index b869617..b1c503a 100644
 --- a/hmp.c
 +++ b/hmp.c
 @@ -1013,8 +1013,14 @@ void hmp_memsave(Monitor *mon, const QDict *qdict)
  const char *filename = qdict_get_str(qdict, "filename");
  uint64_t addr = qdict_get_int(qdict, "val");
  Error *err = NULL;
 +int cpu_index = monitor_get_cpu_index();
  
 -qmp_memsave(addr, size, filename, true, monitor_get_cpu_index(), 
 &err);
 +if (cpu_index < 0) {
 +monitor_printf(mon, "No CPU available\n");
 +return;
 +}
 +
 +qmp_memsave(addr, size, filename, true, cpu_index, &err);
  hmp_handle_error(mon, &err);
  }
  
 diff --git a/monitor.c b/monitor.c
 index 0841d43..17121ff 100644
 --- a/monitor.c
 +++ b/monitor.c
 @@ -1025,6 +1025,9 @@ int monitor_set_cpu(int cpu_index)
  CPUState *mon_get_cpu(void)
  {
  if (!cur_mon->mon_cpu) {
 +if (!first_cpu) {
 +return NULL;
 +}
  monitor_set_cpu(first_cpu->cpu_index);
  }
  cpu_synchronize_state(cur_mon->mon_cpu);
 @@ -1033,17 +1036,27 @@ CPUState *mon_get_cpu(void)
  
  CPUArchState *mon_get_cpu_env(void)
  {
 -return mon_get_cpu()->env_ptr;
 +CPUState *cs = mon_get_cpu();
 +
 +return cs ? cs->env_ptr : NULL;
  }
  
  int monitor_get_cpu_index(void)
  {
 -return mon_get_cpu()->cpu_index;
 +CPUState *cs = mon_get_cpu();
 +
 +return cs ? cs->cpu_index : UNASSIGNED_CPU_INDEX;
  }
  
  static void hmp_info_registers(Monitor *mon, const QDict *qdict)
  {
 -cpu_dump_state(mon_get_cpu(), (FILE *)mon, monitor_fprintf, 
 CPU_DUMP_FPU);
 +CPUState *cs = mon_get_cpu();
 +
 +if (!cs) {
 +monitor_printf(mon, "No CPU available\n");
 +return;
 +}
 +cpu_dump_state(cs, (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU);
  }
  
  static void hmp_info_jit(Monitor *mon, const QDict *qdict)
 @@ -1076,7 +1089,13 @@ static void hmp_info_history(Monitor *mon, const 
 QDict *qdict)
  
  static void hmp_info_cpustats(Monitor *mon, const QDict *qdict)
  {
 -cpu_dump_statistics(mon_get_cpu(), (FILE *)mon, &monitor_fprintf, 0);
 +CPUState *cs = mon_get_cpu();
 +
 +if (!cs) {
 +monitor_printf(mon, "No CPU available\n");
 +return;
 +}
 +cpu_dump_statistics(cs, (FILE *)mon, &monitor_fprintf, 0);
  }
  
  static void hmp_info_trace_events(Monitor *mon, const QDict *qdict)
 @@ -1235,6 +1254,12 @@ static void memory_dump(Monitor *mon, int count, 
 int format, int wsize,
  int l, line_size, i, max_digits, len;
  uint8_t buf[16];
  uint64_t v;
 +CPUState *cs = mon_get_cpu();
 +
 +if (!cs && (format == 'i' || !is_physical)) {
 +monitor_printf(mon, "Can not dump without CPU\n");
 +return;
 +}
>>>
>>> This is basically "if (we're going to dereference cs)".  Not so nice, in
>>> particular since the dereferences are hidden inside mon_get_cpu_env()
>>> calls.  I guess it'll do.
>>>
  
  if (format == 'i') {
  int flags = 0;
 @@ -1264,7 +1289,7 @@ static void memory_dump(Monitor *mon, int count, int 
 format, int wsize,
  flags = msr_le << 16;
  flags |= env->bfd_mach;
  #endif
 -monitor_disas(mon, mon_get_cpu(), addr, count, is_physical, 
 flags);
 +monitor_disas(mon, cs, addr, count, is_physical, flags);
  return;
  }
  
 @@ -1303,7 +1328,7 @@ static void memory_dump(Monitor *mon, int count, int 
 format, int wsize,
  if (is_physical) {
  cpu_physical_memory_read(addr, buf, l);
  } else {
 -if (cpu_memory_rw_debug(mon_get_cpu()

[Qemu-devel] [PATCH] hw/i386: check if nvdimm is enabled before plugging

2017-01-13 Thread Haozhong Zhang

The missing of 'nvdimm' in the machine type option '-M' means NVDIMM
is disabled. QEMU should refuse to plug any NVDIMM device in this case
and report the misconfiguration.

Reported-by: Stefan Hajnoczi 
Signed-off-by: Haozhong Zhang 
Message-Id: 20170112110928.GF4621@stefanha-x1.localdomain
Message-Id: 20170111093630.2088-1-stefa...@redhat.com
---
 hw/i386/pc.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 25e8586..3907609 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1715,6 +1715,11 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
 }
 
 if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+if (!pcms->acpi_nvdimm_state.is_enabled) {
+error_setg(&local_err,
+   "nvdimm is not enabled: missing 'nvdimm' in '-M'");
+goto out;
+}
 nvdimm_plug(&pcms->acpi_nvdimm_state);
 }
 
-- 
2.10.1

Re: [Qemu-devel] [PATCH 0/2] virtio: revert virtio_queue_set_notification() nesting

2017-01-13 Thread Stefan Hajnoczi

On Thu, Jan 12, 2017 at 10:57:53AM -0600, Doug Goldstein wrote:
> On 1/12/17 5:46 AM, Stefan Hajnoczi wrote:
> > The virtio_queue_set_notification() nesting introduced for AioContext 
> > polling
> > raised an assertion with virtio-net (even in non-polling mode).  Converting
> > virtio-net and virtio-crypto to use virtio_queue_set_notification() in a
> > nesting fashion would be invasive and isn't worth it.
> > 
> > Patch 1 contains the revert to resolve the bug that Doug noticed.
> > 
> > Patch 2 is a less efficient but safe alternative.
> > 
> > Stefan Hajnoczi (2):
> >   Revert "virtio: turn vq->notification into a nested counter"
> >   virtio: disable notifications again after poll succeeded
> > 
> >  hw/virtio/virtio.c | 21 +
> >  1 file changed, 9 insertions(+), 12 deletions(-)
> > 
> 
> So I just gave this series a whirl and it fixes the assert but causes
> another issue for me. While iPXE is getting a DHCP address the screen
> immediately flashes over to the UEFI shell. Its like a timeout is
> getting hit and just dropping me to the shell.

Sounds like an separate problem.

Stefan


signature.asc
Description: PGP signature

[Qemu-devel] [PATCH v4] monitor: Fix crashes when using HMP commands without CPU

2017-01-13 Thread Thomas Huth

When running certain HMP commands ("info registers", "info cpustats",
"info tlb", "nmi", "memsave" or dumping virtual memory) with the "none"
machine, QEMU crashes with a segmentation fault. This happens because the
"none" machine does not have any CPUs by default, but these HMP commands
did not check for a valid CPU pointer yet. Add such checks now, so we get
an error message about the missing CPU instead.

Signed-off-by: Thomas Huth 
---
 v4:
 - Added some more target-specifc checks (for "info tlb" for example)
 v3:
 - Use UNASSIGNED_CPU_INDEX instead of hard-coded -1
 v2:
 - Added more checks to cover "nmi" and "memsave", too

 hmp.c   |  8 +++-
 monitor.c   | 42 ++
 target/i386/monitor.c   | 16 +++-
 target/ppc/monitor.c|  4 
 target/sh4/monitor.c|  5 +
 target/sparc/monitor.c  |  4 
 target/xtensa/monitor.c |  4 
 7 files changed, 73 insertions(+), 10 deletions(-)

diff --git a/hmp.c b/hmp.c
index b869617..b1c503a 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1013,8 +1013,14 @@ void hmp_memsave(Monitor *mon, const QDict *qdict)
 const char *filename = qdict_get_str(qdict, "filename");
 uint64_t addr = qdict_get_int(qdict, "val");
 Error *err = NULL;
+int cpu_index = monitor_get_cpu_index();
 
-qmp_memsave(addr, size, filename, true, monitor_get_cpu_index(), &err);
+if (cpu_index < 0) {
+monitor_printf(mon, "No CPU available\n");
+return;
+}
+
+qmp_memsave(addr, size, filename, true, cpu_index, &err);
 hmp_handle_error(mon, &err);
 }
 
diff --git a/monitor.c b/monitor.c
index 0841d43..bf488e9 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1025,6 +1025,9 @@ int monitor_set_cpu(int cpu_index)
 CPUState *mon_get_cpu(void)
 {
 if (!cur_mon->mon_cpu) {
+if (!first_cpu) {
+return NULL;
+}
 monitor_set_cpu(first_cpu->cpu_index);
 }
 cpu_synchronize_state(cur_mon->mon_cpu);
@@ -1033,17 +1036,27 @@ CPUState *mon_get_cpu(void)
 
 CPUArchState *mon_get_cpu_env(void)
 {
-return mon_get_cpu()->env_ptr;
+CPUState *cs = mon_get_cpu();
+
+return cs ? cs->env_ptr : NULL;
 }
 
 int monitor_get_cpu_index(void)
 {
-return mon_get_cpu()->cpu_index;
+CPUState *cs = mon_get_cpu();
+
+return cs ? cs->cpu_index : UNASSIGNED_CPU_INDEX;
 }
 
 static void hmp_info_registers(Monitor *mon, const QDict *qdict)
 {
-cpu_dump_state(mon_get_cpu(), (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU);
+CPUState *cs = mon_get_cpu();
+
+if (!cs) {
+monitor_printf(mon, "No CPU available\n");
+return;
+}
+cpu_dump_state(cs, (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU);
 }
 
 static void hmp_info_jit(Monitor *mon, const QDict *qdict)
@@ -1076,7 +1089,13 @@ static void hmp_info_history(Monitor *mon, const QDict 
*qdict)
 
 static void hmp_info_cpustats(Monitor *mon, const QDict *qdict)
 {
-cpu_dump_statistics(mon_get_cpu(), (FILE *)mon, &monitor_fprintf, 0);
+CPUState *cs = mon_get_cpu();
+
+if (!cs) {
+monitor_printf(mon, "No CPU available\n");
+return;
+}
+cpu_dump_statistics(cs, (FILE *)mon, &monitor_fprintf, 0);
 }
 
 static void hmp_info_trace_events(Monitor *mon, const QDict *qdict)
@@ -1235,6 +1254,12 @@ static void memory_dump(Monitor *mon, int count, int 
format, int wsize,
 int l, line_size, i, max_digits, len;
 uint8_t buf[16];
 uint64_t v;
+CPUState *cs = mon_get_cpu();
+
+if (!cs && (format == 'i' || !is_physical)) {
+monitor_printf(mon, "Can not dump without CPU\n");
+return;
+}
 
 if (format == 'i') {
 int flags = 0;
@@ -1264,7 +1289,7 @@ static void memory_dump(Monitor *mon, int count, int 
format, int wsize,
 flags = msr_le << 16;
 flags |= env->bfd_mach;
 #endif
-monitor_disas(mon, mon_get_cpu(), addr, count, is_physical, flags);
+monitor_disas(mon, cs, addr, count, is_physical, flags);
 return;
 }
 
@@ -1303,7 +1328,7 @@ static void memory_dump(Monitor *mon, int count, int 
format, int wsize,
 if (is_physical) {
 cpu_physical_memory_read(addr, buf, l);
 } else {
-if (cpu_memory_rw_debug(mon_get_cpu(), addr, buf, l, 0) < 0) {
+if (cpu_memory_rw_debug(cs, addr, buf, l, 0) < 0) {
 monitor_printf(mon, " Cannot access memory\n");
 break;
 }
@@ -2186,11 +2211,12 @@ expr_error(Monitor *mon, const char *fmt, ...)
 static int get_monitor_def(target_long *pval, const char *name)
 {
 const MonitorDef *md = target_monitor_defs();
+CPUState *cs = mon_get_cpu();
 void *ptr;
 uint64_t tmp = 0;
 int ret;
 
-if (md == NULL) {
+if (cs == NULL || md == NULL) {
 return -1;
 }
 
@@ -2217,7 +2243,7 @@ static int get_monitor_def(target_long *pval, const char 
*name)
 }
 }
 
-ret = target_get_monitor_def(mon_get_cpu(), name, &tmp);

Re: [Qemu-devel] [PATCH 0/6] ppc: add a IBM 40p machine (RS/6000, PReP)

2017-01-13 Thread Mark Cave-Ayland

On 12/01/17 12:57, Hervé Poussineau wrote:

> Le 11/01/2017 à 17:58, Artyom Tarasenko a écrit :
>> Hi Hervé,
>>
>> nice work!
>>
>> On Thu, Dec 29, 2016 at 11:12 PM, Hervé Poussineau
>>  wrote:
>>> Hi,
>>>
>>> This patchset adds the emulation of the IBM RS/6000 7020 (40p). The
>>> real machine is
>>> able to run AIX (up to 4.3.3), Windows NT (up to 4.0 SP1), the beta
>>> of OS/2 PowerPC,
>>> Solaris, Linux, NetBSD/PReP ...
>>>
>>> I've tested current emulation with Open Hack'Ware, OpenBIOS and
>>> official firmware.
>>>
>>> Linux kernel starts, and freezes during boot (like with 'prep' machine).
> 
> I already saw a regression during 2.7.0 cycle with 603 CPU. However, I
> was unable to provide kernel source, so Benjamin was unable to find the
> problem.
> http://lists.gnu.org/archive/html/qemu-devel/2016-08/msg03760.html
> 
>>
>> If prep can't do it anymore, it looks like a regression. I definitely
>> remember seen a sitting penguin and a login prompt ~ 2 years ago. At
>> least with OFW.
>>
>>> Windows NT starts up to the point where it wants to change endianness.
>>
>> I hit that with Solaris/PPC a few years back as you published your
>> previous attempt. Do you know what is missing? I guess CPU endianness
>> switch emulation is working because it is used in the newer POWER
>> CPUs. Is it just the systemIO which has to be improved, or is it more?
> 
> Yes, PReP System I/O has LE flag which is not implemented.
> You may be interested by
> ftp://ftp.software.ibm.com/rs6000/technology/spec/endian.ps
> which deals about endianness switching, with some code from Windows NT/PPC
> 
>>
>>> Other OSes have not been tested.
>>>
>>> This machine is a superset of the 'prep' one, because we know exactly
>>> what is/should
>>> emulated, and that operating system list running on it is quite wide.
>>> I hope that 'prep' machine can be deprecated soon and then later
>>> removed.
>>
>> Would be nice to keep 'prep' until the 40p can boot Linux and NetBSD
>> 6.1.3 (this version used to work with -M prep last time I checked).
> 
> Some Linux kernels seem to work, some other ones seem to not work (hang
> while booting)
> I've not searched why.
> 
> I tried NetBSD 6.1.3/PReP.
> cdroms/harddisks don't boot anymore with Open Hack'Ware since some
> changes in IDE core
> Kernel boots better with 40p than with prep
> 
> 
> prep:
> 
> Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
> 2006, 2007, 2008, 2009, 2010, 2011, 2012
> The NetBSD Foundation, Inc.  All rights reserved.
> Copyright (c) 1982, 1986, 1989, 1991, 1993
> The Regents of the University of California.  All rights reserved.
> 
> NetBSD 6.1.3 (INSTALL)
> Model: Qemu
> total memory = 128 MB
> avail memory = 119 MB
> panic: call to null-ptr from 0x0
> 
> The operating system has halted.
> Please press any key to reboot.
> 
> 
> 40p/Open Hack'Ware:
> 
> Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
> 2006, 2007, 2008, 2009, 2010, 2011, 2012
> The NetBSD Foundation, Inc.  All rights reserved.
> Copyright (c) 1982, 1986, 1989, 1991, 1993
> The Regents of the University of California.  All rights reserved.
> 
> NetBSD 6.1.3 (INSTALL)
> Model: Qemu
> total memory = 128 MB
> avail memory = 119 MB
> mainbus0 (root)
> cpu0 at mainbus0: 604 (Revision 1.3), ID 0 (primary)
> cpu0: HID0 0xc084, powersave: 1
> cpu0: 0.00 MHz
> Couldn't find PNP data for bus 0 devfunc 0x0
> pnpbus0 at mainbus0
> pci0 at mainbus0 bus 0: indirect configuration space access
> pchb0 at pci0 dev 0 function 0
> pchb0: vendor 0x1057 product 0x4801 (rev. 0x00)
> siop0 at pci0 dev 1 function 0: Symbios Logic 53c810 (fast scsi)
> siop0: couldn't map interrupt
> vga0 at pci0 dev 2 function 0: vendor 0x1234 product 0x (rev. 0x02)
> wsdisplay0 at vga0 (kbdmux ignored)
> drm at vga0 not configured
> pcn0 at pci0 dev 3 function 0: AMD PCnet-PCI Ethernet
> pcn0: Am79c970A PCnet-PCI II rev 0, Ethernet address 52:54:00:12:34:56
> pcn0: unable to map interrupt
> pcib0 at pci0 dev 11 function 0: vendor 0x8086 product 0x0484 (rev. 0x03)
> isa0 at pcib0
> com0 at isa0 port 0x3f8-0x3ff irq 4: ns16550a, working fifo
> com0: console
> com1 at isa0 port 0x2f8-0x2ff irq 3: ns16550a, working fifo
> pckbc0 at isa0 port 0x60-0x64
> pckbd0 at pckbc0 (kbd slot)
> pckbc0: using irq 1 for kbd slot
> wskbd0 at pckbd0 (mux ignored)
> vmmask 1000 schedmask 1000 highmask 7000
> boot device: mainbus0
> root on md0a dumps on md0b
> root file system type: ffs
> WARNING: no TOD clock present
> WARNING: using filesystem time
> WARNING: CHECK AND RESET THE DATE!
> erase ^H, werase ^W, kill ^U, intr ^C, status ^T
> Terminal type? [vt100]
> 
> 
> 40p/official firmware with cdrom boot:
> 
> Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
> 2006, 2007, 2008, 2009, 2010, 2011,

Re: [Qemu-devel] [PATCH] hw/i386: check if nvdimm is enabled before plugging

2017-01-13 Thread Xiao Guangrong




On 01/13/2017 07:56 PM, Haozhong Zhang wrote:

The missing of 'nvdimm' in the machine type option '-M' means NVDIMM
is disabled. QEMU should refuse to plug any NVDIMM device in this case
and report the misconfiguration.


Thanks for your fix.

Reviewed-by: Xiao Guangrong

Re: [Qemu-devel] [PATCH v3 3/3] cputlb: drop flush_global flag from tlb_flush

2017-01-13 Thread Alex Bennée


Richard Henderson  writes:

> On 01/12/2017 08:03 AM, Alex Bennée wrote:
>> And I immediately realise I missed out:
>>
>>> Signed-off-by: Alex Bennée 
>>> Reviewed-by: Richard Henderson 
>> [DG: ppc portions]
>> Acked-by: David Gibson 
>>
>> Do you want me to re-post or can you apply when you take the patches?
>>
>
> I also don't mind if you send the pull request.

If Peter is OK with that?

In that case I now have a few more tags to add and I can roll a PULL REQ.

--
Alex Bennée

[Qemu-devel] system_clock_scale unset in hw/intc/armv7m_nvic.c

2017-01-13 Thread James Hanley

How should system_clock_scale be set in hw/intc/armv7m_nvic.c - there is no
API for the global, and I've defaulted the global to SYSTICK_SCALE which
seems to work, but is not obvious.  This was needed as this value was set
to zero and the arm firmware was selecting the external clock source
causing zero to be returned from systick_scale() and the interrupt to run
continuously.

diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 06d8db6..c15841e 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -70,7 +70,7 @@ static const uint8_t nvic_id[] = {
 #define SYSTICK_CLKSOURCE (1 << 2)
 #define SYSTICK_COUNTFLAG (1 << 16)

-int system_clock_scale;
+int system_clock_scale = SYSTICK_SCALE;

 /* Conversion factor from qemu timer to SysTick frequencies.  */
 static inline int64_t systick_scale(nvic_state *s)

Re: [Qemu-devel] [Qemu-discuss] system_clock_scale unset in hw/intc/armv7m_nvic.c

2017-01-13 Thread Peter Maydell

On 13 January 2017 at 12:35, James Hanley  wrote:
> How should system_clock_scale be set in hw/intc/armv7m_nvic.c - there is no
> API for the global, and I've defaulted the global to SYSTICK_SCALE which
> seems to work, but is not obvious.  This was needed as this value was set to
> zero and the arm firmware was selecting the external clock source causing
> zero to be returned from systick_scale() and the interrupt to run
> continuously.

If I recall correctly, this is an external-to-the-CPU thing
(wired up by the SoC or the board), which suggests that it
ought to be a property on the CPU object (or perhaps on a
currently-nonexistent container object which has the CPU
and the NVIC and the other M profile devices).

As the code stands today, I think the answer is that the
board has to set the global -- this is what stellaris.c does.

thanks
-- PMM

Re: [Qemu-devel] [PATCH v4] monitor: Fix crashes when using HMP commands without CPU

2017-01-13 Thread Markus Armbruster

Thomas Huth  writes:

> When running certain HMP commands ("info registers", "info cpustats",
> "info tlb", "nmi", "memsave" or dumping virtual memory) with the "none"
> machine, QEMU crashes with a segmentation fault. This happens because the
> "none" machine does not have any CPUs by default, but these HMP commands
> did not check for a valid CPU pointer yet. Add such checks now, so we get
> an error message about the missing CPU instead.
>
> Signed-off-by: Thomas Huth 
> ---
>  v4:
>  - Added some more target-specifc checks (for "info tlb" for example)
>  v3:
>  - Use UNASSIGNED_CPU_INDEX instead of hard-coded -1
>  v2:
>  - Added more checks to cover "nmi" and "memsave", too
>
>  hmp.c   |  8 +++-
>  monitor.c   | 42 ++
>  target/i386/monitor.c   | 16 +++-
>  target/ppc/monitor.c|  4 
>  target/sh4/monitor.c|  5 +
>  target/sparc/monitor.c  |  4 
>  target/xtensa/monitor.c |  4 
>  7 files changed, 73 insertions(+), 10 deletions(-)
>
> diff --git a/hmp.c b/hmp.c
> index b869617..b1c503a 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -1013,8 +1013,14 @@ void hmp_memsave(Monitor *mon, const QDict *qdict)
>  const char *filename = qdict_get_str(qdict, "filename");
>  uint64_t addr = qdict_get_int(qdict, "val");
>  Error *err = NULL;
> +int cpu_index = monitor_get_cpu_index();
>  
> -qmp_memsave(addr, size, filename, true, monitor_get_cpu_index(), &err);
> +if (cpu_index < 0) {
> +monitor_printf(mon, "No CPU available\n");
> +return;
> +}
> +
> +qmp_memsave(addr, size, filename, true, cpu_index, &err);
>  hmp_handle_error(mon, &err);
>  }
>  
> diff --git a/monitor.c b/monitor.c
> index 0841d43..bf488e9 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -1025,6 +1025,9 @@ int monitor_set_cpu(int cpu_index)
>  CPUState *mon_get_cpu(void)
>  {
>  if (!cur_mon->mon_cpu) {
> +if (!first_cpu) {
> +return NULL;
> +}
>  monitor_set_cpu(first_cpu->cpu_index);
>  }
>  cpu_synchronize_state(cur_mon->mon_cpu);
> @@ -1033,17 +1036,27 @@ CPUState *mon_get_cpu(void)
>  
>  CPUArchState *mon_get_cpu_env(void)
>  {
> -return mon_get_cpu()->env_ptr;
> +CPUState *cs = mon_get_cpu();
> +
> +return cs ? cs->env_ptr : NULL;
>  }
>  
>  int monitor_get_cpu_index(void)
>  {
> -return mon_get_cpu()->cpu_index;
> +CPUState *cs = mon_get_cpu();
> +
> +return cs ? cs->cpu_index : UNASSIGNED_CPU_INDEX;
>  }
>  
>  static void hmp_info_registers(Monitor *mon, const QDict *qdict)
>  {
> -cpu_dump_state(mon_get_cpu(), (FILE *)mon, monitor_fprintf, 
> CPU_DUMP_FPU);
> +CPUState *cs = mon_get_cpu();
> +
> +if (!cs) {
> +monitor_printf(mon, "No CPU available\n");
> +return;
> +}
> +cpu_dump_state(cs, (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU);
>  }
>  
>  static void hmp_info_jit(Monitor *mon, const QDict *qdict)
> @@ -1076,7 +1089,13 @@ static void hmp_info_history(Monitor *mon, const QDict 
> *qdict)
>  
>  static void hmp_info_cpustats(Monitor *mon, const QDict *qdict)
>  {
> -cpu_dump_statistics(mon_get_cpu(), (FILE *)mon, &monitor_fprintf, 0);
> +CPUState *cs = mon_get_cpu();
> +
> +if (!cs) {
> +monitor_printf(mon, "No CPU available\n");
> +return;
> +}
> +cpu_dump_statistics(cs, (FILE *)mon, &monitor_fprintf, 0);
>  }
>  
>  static void hmp_info_trace_events(Monitor *mon, const QDict *qdict)
> @@ -1235,6 +1254,12 @@ static void memory_dump(Monitor *mon, int count, int 
> format, int wsize,
>  int l, line_size, i, max_digits, len;
>  uint8_t buf[16];
>  uint64_t v;
> +CPUState *cs = mon_get_cpu();
> +
> +if (!cs && (format == 'i' || !is_physical)) {
> +monitor_printf(mon, "Can not dump without CPU\n");
> +return;
> +}
>  
>  if (format == 'i') {
>  int flags = 0;
> @@ -1264,7 +1289,7 @@ static void memory_dump(Monitor *mon, int count, int 
> format, int wsize,
>  flags = msr_le << 16;
>  flags |= env->bfd_mach;
>  #endif
> -monitor_disas(mon, mon_get_cpu(), addr, count, is_physical, flags);
> +monitor_disas(mon, cs, addr, count, is_physical, flags);
>  return;
>  }
>  
> @@ -1303,7 +1328,7 @@ static void memory_dump(Monitor *mon, int count, int 
> format, int wsize,
>  if (is_physical) {
>  cpu_physical_memory_read(addr, buf, l);
>  } else {
> -if (cpu_memory_rw_debug(mon_get_cpu(), addr, buf, l, 0) < 0) {
> +if (cpu_memory_rw_debug(cs, addr, buf, l, 0) < 0) {
>  monitor_printf(mon, " Cannot access memory\n");
>  break;
>  }
> @@ -2186,11 +2211,12 @@ expr_error(Monitor *mon, const char *fmt, ...)
>  static int get_monitor_def(target_long *pval, const char *name)
>  {
>  const MonitorDef *md = target_monitor_defs();
> +CPUState *cs = mon_get_cpu();

Re: [Qemu-devel] [PATCH v4 0/2] Fixes/tests for hmp_object_del()

2017-01-13 Thread Markus Armbruster

Andreas, please have a look.  Feel free to ask me to take it through my
tree.

Michael Roth  writes:

> hmp_object_del() followed by a subsequent hmp_object_add() can trigger a
> duplicate ID error if the previous object shared the same ID and was added
> via the command-line. Please see patch 2/2 for more details.
>
> This patchset fixes the issue in question and adds some general unit tests
> for object created via -object, which we later extend to verify the fix in
> question.
>
> Changes since v3:
>
>   - Fixed up comment formating (Markus)
>   - Instead of segfaulting, use &error_abort if assumptions about
> 'object' property group existence change (Markus)
>   - Use g_assert_null in place of g_assert(... == NULL) (Markus)
>
> Changes since v2:
>
>   - Moved the generic unit tests ahead of the fix patch, with a FIXME
> in place of the actual check for the failure addressed in patch
> 2/2 (Daniel/Markus)
>   - Dropped check for existence of objects' QemuOptsList (Markus)
>   - Dropped unintended whitespace removal in PATCH 1/2
>   - Slight rewording of commit messages to reflect the changes and fix
> minor grammar errors.
>
> Changes since v1:
>
>   - Moved QemuOpt cleanup out of {qmp,hmp}_object_del() and into common
> user_creatable_del() path (Daniel, David)
>   - Added corresponding test case in check-qom-proplist

[Qemu-devel] [PATCH v2 0/5] Fixes for target/m68k

2017-01-13 Thread Laurent Vivier

This is a series of fixes for target/m68k found:
- with RISU (bit operation with immediate)
- while debugging package build under chroot
  (gen_flush_flags() and CAS address modes)
- while I was working on the softmmu mode
  (CAS alignment and SP address modes)

v2:
- Don't align stack access on coldfire.

Laurent Vivier (5):
  target-m68k: fix bit operation with immediate value
  target-m68k: fix gen_flush_flags()
  target-m68k: manage pre-dec et post-inc in CAS
  target-m68k: CAS doesn't need aligned access
  target-m68k: increment/decrement with SP

 target/m68k/translate.c | 40 +---
 1 file changed, 33 insertions(+), 7 deletions(-)

-- 
2.7.4

[Qemu-devel] [PATCH v2 2/5] target-m68k: fix gen_flush_flags()

2017-01-13 Thread Laurent Vivier

gen_flush_flags() is setting unconditionally cc_op_synced to 1
and s->cc_op to CC_OP_FLAGS, whereas env->cc_op can be set
to something else by a previous tcg fragment.

We fix that by not setting cc_op_synced to 1
(except for gen_helper_flush_flags() that updates env->cc_op)

FIX: https://github.com/vivier/qemu-m68k/issues/19

Signed-off-by: Laurent Vivier 
---
 target/m68k/translate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 410f56a..0e97900 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -595,18 +595,19 @@ static void gen_flush_flags(DisasContext *s)
 
 case CC_OP_DYNAMIC:
 gen_helper_flush_flags(cpu_env, QREG_CC_OP);
+s->cc_op_synced = 1;
 break;
 
 default:
 t0 = tcg_const_i32(s->cc_op);
 gen_helper_flush_flags(cpu_env, t0);
 tcg_temp_free(t0);
+s->cc_op_synced = 1;
 break;
 }
 
 /* Note that flush_flags also assigned to env->cc_op.  */
 s->cc_op = CC_OP_FLAGS;
-s->cc_op_synced = 1;
 }
 
 static inline TCGv gen_extend(TCGv val, int opsize, int sign)
-- 
2.7.4

[Qemu-devel] [PATCH v2 1/5] target-m68k: fix bit operation with immediate value

2017-01-13 Thread Laurent Vivier

M680x0 bit operations with an immediate value use 9 bits of the 16bit
value, while coldfire ones use only 8 bits.

Signed-off-by: Laurent Vivier 
---
 target/m68k/translate.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 5f7357e..410f56a 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -1801,9 +1801,16 @@ DISAS_INSN(bitop_im)
 op = (insn >> 6) & 3;
 
 bitnum = read_im16(env, s);
-if (bitnum & 0xff00) {
-disas_undef(env, s, insn);
-return;
+if (m68k_feature(s->env, M68K_FEATURE_M68000)) {
+if (bitnum & 0xfe00) {
+disas_undef(env, s, insn);
+return;
+}
+} else {
+if (bitnum & 0xff00) {
+disas_undef(env, s, insn);
+return;
+}
 }
 
 SRC_EA(env, src1, opsize, 0, op ? &addr: NULL);
-- 
2.7.4

[Qemu-devel] [PATCH v2 3/5] target-m68k: manage pre-dec et post-inc in CAS

2017-01-13 Thread Laurent Vivier

In these cases we must update the address register after
the operation.

Signed-off-by: Laurent Vivier 
---
 target/m68k/translate.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 0e97900..23e2b06 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -1963,6 +1963,15 @@ DISAS_INSN(cas)
 gen_partset_reg(opsize, DREG(ext, 0), load);
 
 tcg_temp_free(load);
+
+switch (extract32(insn, 3, 3)) {
+case 3: /* Indirect postincrement.  */
+tcg_gen_addi_i32(AREG(insn, 0), addr, opsize_bytes(opsize));
+break;
+case 4: /* Indirect predecrememnt.  */
+tcg_gen_mov_i32(AREG(insn, 0), addr);
+break;
+}
 }
 
 DISAS_INSN(cas2w)
-- 
2.7.4

[Qemu-devel] [PATCH v2 5/5] target-m68k: increment/decrement with SP

2017-01-13 Thread Laurent Vivier

On 680x0 family only.

Address Register indirect With postincrement:

When using the stack pointer (A7) with byte size data, the register
is incremented by two.

Address Register indirect With predecrement:

When using the stack pointer (A7) with byte size data, the register
is decremented by two.

Signed-off-by: Laurent Vivier 
---
 target/m68k/translate.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index cf5d8dd..727c189 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -725,7 +725,12 @@ static TCGv gen_lea_mode(CPUM68KState *env, DisasContext 
*s,
 }
 reg = get_areg(s, reg0);
 tmp = tcg_temp_new();
-tcg_gen_subi_i32(tmp, reg, opsize_bytes(opsize));
+if (reg0 == 7 && opsize == OS_BYTE &&
+m68k_feature(s->env, M68K_FEATURE_M68000)) {
+tcg_gen_subi_i32(tmp, reg, 2);
+} else {
+tcg_gen_subi_i32(tmp, reg, opsize_bytes(opsize));
+}
 return tmp;
 case 5: /* Indirect displacement.  */
 reg = get_areg(s, reg0);
@@ -801,7 +806,12 @@ static TCGv gen_ea_mode(CPUM68KState *env, DisasContext 
*s, int mode, int reg0,
 result = gen_ldst(s, opsize, reg, val, what);
 if (what == EA_STORE || !addrp) {
 TCGv tmp = tcg_temp_new();
-tcg_gen_addi_i32(tmp, reg, opsize_bytes(opsize));
+if (reg0 == 7 && opsize == OS_BYTE &&
+m68k_feature(s->env, M68K_FEATURE_M68000)) {
+tcg_gen_subi_i32(tmp, reg, 2);
+} else {
+tcg_gen_addi_i32(tmp, reg, opsize_bytes(opsize));
+}
 delay_set_areg(s, reg0, tmp, true);
 }
 return result;
-- 
2.7.4

[Qemu-devel] [PATCH v2 4/5] target-m68k: CAS doesn't need aligned access

2017-01-13 Thread Laurent Vivier

Signed-off-by: Laurent Vivier 
---
 target/m68k/translate.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 23e2b06..cf5d8dd 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -1934,7 +1934,6 @@ DISAS_INSN(cas)
 default:
 g_assert_not_reached();
 }
-opc |= MO_ALIGN;
 
 ext = read_im16(env, s);
 
-- 
2.7.4

Re: [Qemu-devel] [PATCH v2 5/5] target-m68k: increment/decrement with SP

2017-01-13 Thread Thomas Huth

On 13.01.2017 13:52, Laurent Vivier wrote:
> On 680x0 family only.
> 
> Address Register indirect With postincrement:
> 
> When using the stack pointer (A7) with byte size data, the register
> is incremented by two.
> 
> Address Register indirect With predecrement:
> 
> When using the stack pointer (A7) with byte size data, the register
> is decremented by two.
> 
> Signed-off-by: Laurent Vivier 
> ---
>  target/m68k/translate.c | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/target/m68k/translate.c b/target/m68k/translate.c
> index cf5d8dd..727c189 100644
> --- a/target/m68k/translate.c
> +++ b/target/m68k/translate.c
> @@ -725,7 +725,12 @@ static TCGv gen_lea_mode(CPUM68KState *env, DisasContext 
> *s,
>  }
>  reg = get_areg(s, reg0);
>  tmp = tcg_temp_new();
> -tcg_gen_subi_i32(tmp, reg, opsize_bytes(opsize));
> +if (reg0 == 7 && opsize == OS_BYTE &&
> +m68k_feature(s->env, M68K_FEATURE_M68000)) {
> +tcg_gen_subi_i32(tmp, reg, 2);
> +} else {
> +tcg_gen_subi_i32(tmp, reg, opsize_bytes(opsize));
> +}
>  return tmp;
>  case 5: /* Indirect displacement.  */
>  reg = get_areg(s, reg0);
> @@ -801,7 +806,12 @@ static TCGv gen_ea_mode(CPUM68KState *env, DisasContext 
> *s, int mode, int reg0,
>  result = gen_ldst(s, opsize, reg, val, what);
>  if (what == EA_STORE || !addrp) {
>  TCGv tmp = tcg_temp_new();
> -tcg_gen_addi_i32(tmp, reg, opsize_bytes(opsize));
> +if (reg0 == 7 && opsize == OS_BYTE &&
> +m68k_feature(s->env, M68K_FEATURE_M68000)) {
> +tcg_gen_subi_i32(tmp, reg, 2);
> +} else {
> +tcg_gen_addi_i32(tmp, reg, opsize_bytes(opsize));
> +}
>  delay_set_areg(s, reg0, tmp, true);
>  }
>  return result;
> 

Reviewed-by: Thomas Huth

Re: [Qemu-devel] [PATCH v6 wave 2 2/3] hw/isa/lpc_ich9: add broadcast SMI feature

2017-01-13 Thread Igor Mammedov

On Thu, 12 Jan 2017 19:24:45 +0100
Laszlo Ersek  wrote:

> The generic edk2 SMM infrastructure prefers
> EFI_SMM_CONTROL2_PROTOCOL.Trigger() to inject an SMI on each processor. If
> Trigger() only brings the current processor into SMM, then edk2 handles it
> in the following ways:
> 
> (1) If Trigger() is executed by the BSP (which is guaranteed before
> ExitBootServices(), but is not necessarily true at runtime), then:
> 
> (a) If edk2 has been configured for "traditional" SMM synchronization,
> then the BSP sends directed SMIs to the APs with APIC delivery,
> bringing them into SMM individually. Then the BSP runs the SMI
> handler / dispatcher.
> 
> (b) If edk2 has been configured for "relaxed" SMM synchronization,
> then the APs that are not already in SMM are not brought in, and
> the BSP runs the SMI handler / dispatcher.
> 
> (2) If Trigger() is executed by an AP (which is possible after
> ExitBootServices(), and can be forced e.g. by "taskset -c 1
> efibootmgr"), then the AP in question brings in the BSP with a
> directed SMI, and the BSP runs the SMI handler / dispatcher.
> 
> The smaller problem with (1a) and (2) is that the BSP and AP
> synchronization is slow. For example, the "taskset -c 1 efibootmgr"
> command from (2) can take more than 3 seconds to complete, because
> efibootmgr accesses non-volatile UEFI variables intensively.
> 
> The larger problem is that QEMU's current behavior diverges from the
> behavior usually seen on physical hardware, and that keeps exposing
> obscure corner cases, race conditions and other instabilities in edk2,
> which generally expects / prefers a software SMI to affect all CPUs at
> once.
> 
> Therefore introduce the "broadcast SMI" feature that causes QEMU to inject
> the SMI on all VCPUs.
> 
> While the original posting of this patch
> 
> only intended to speed up (2), based on our recent "stress testing" of SMM
> this patch actually provides functional improvements.
> 
> Cc: "Michael S. Tsirkin" 
> Cc: Gerd Hoffmann 
> Cc: Igor Mammedov 
> Cc: Paolo Bonzini 
> Signed-off-by: Laszlo Ersek 
> Reviewed-by: Michael S. Tsirkin 
> ---
> 
> Notes:
> v6:
> - no changes, pick up Michael's R-b
> 
> v5:
> - replace the ICH9_LPC_SMI_F_BROADCAST bit value with the
>   ICH9_LPC_SMI_F_BROADCAST_BIT bit position (necessary for
>   DEFINE_PROP_BIT() in the next patch)
> 
>  include/hw/i386/ich9.h |  3 +++
>  hw/isa/lpc_ich9.c  | 10 +-
>  2 files changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h
> index da1118727146..18dcca7ebcbf 100644
> --- a/include/hw/i386/ich9.h
> +++ b/include/hw/i386/ich9.h
> @@ -250,4 +250,7 @@ Object *ich9_lpc_find(void);
>  #define ICH9_SMB_HST_D1 0x06
>  #define ICH9_SMB_HOST_BLOCK_DB  0x07
>  
> +/* bit positions used in fw_cfg SMI feature negotiation */
> +#define ICH9_LPC_SMI_F_BROADCAST_BIT0
> +
>  #endif /* HW_ICH9_H */
> diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
> index 376b7801a42c..ced6f803a4f2 100644
> --- a/hw/isa/lpc_ich9.c
> +++ b/hw/isa/lpc_ich9.c
> @@ -437,7 +437,15 @@ static void ich9_apm_ctrl_changed(uint32_t val, void 
> *arg)
>  
>  /* SMI_EN = PMBASE + 30. SMI control and enable register */
>  if (lpc->pm.smi_en & ICH9_PMIO_SMI_EN_APMC_EN) {
> -cpu_interrupt(current_cpu, CPU_INTERRUPT_SMI);
> +if (lpc->smi_negotiated_features &
> +(UINT64_C(1) << ICH9_LPC_SMI_F_BROADCAST_BIT)) {
> +CPUState *cs;
> +CPU_FOREACH(cs) {
> +cpu_interrupt(cs, CPU_INTERRUPT_SMI);
> +}
Shouldn't CPUs with default SMI base be excluded from broadcast?

> +} else {
> +cpu_interrupt(current_cpu, CPU_INTERRUPT_SMI);
> +}
>  }
>  }
>

[Qemu-devel] [PATCH 03/16] test-thread-pool: use generic AioContext infrastructure

2017-01-13 Thread Paolo Bonzini

Once the thread pool starts using aio_co_wake, it will also need
qemu_get_current_aio_context().  Make test-thread-pool create
an AioContext with qemu_init_main_loop, so that stubs/iothread.c
and tests/iothread.c can provide the rest.

Signed-off-by: Paolo Bonzini 
---
 tests/test-thread-pool.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
index 8dbf66a..91b4ec5 100644
--- a/tests/test-thread-pool.c
+++ b/tests/test-thread-pool.c
@@ -6,6 +6,7 @@
 #include "qapi/error.h"
 #include "qemu/timer.h"
 #include "qemu/error-report.h"
+#include "qemu/main-loop.h"
 
 static AioContext *ctx;
 static ThreadPool *pool;
@@ -224,15 +225,9 @@ static void test_cancel_async(void)
 int main(int argc, char **argv)
 {
 int ret;
-Error *local_error = NULL;
 
-init_clocks();
-
-ctx = aio_context_new(&local_error);
-if (!ctx) {
-error_reportf_err(local_error, "Failed to create AIO Context: ");
-exit(1);
-}
+qemu_init_main_loop(&error_abort);
+ctx = qemu_get_current_aio_context();
 pool = aio_get_thread_pool(ctx);
 
 g_test_init(&argc, &argv, NULL);
@@ -245,6 +240,5 @@ int main(int argc, char **argv)
 
 ret = g_test_run();
 
-aio_context_unref(ctx);
 return ret;
 }
-- 
2.9.3

[Qemu-devel] [PATCH 02/16] block-backend: allow blk_prw from coroutine context

2017-01-13 Thread Paolo Bonzini

qcow2_create2 calls this.  Do not run a nested event loop, as that
breaks when aio_co_wake tries to queue the coroutine on the co_queue_wakeup
list of the currently running one.

Signed-off-by: Paolo Bonzini 
---
 block/block-backend.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index efbf398..1177598 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -880,7 +880,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, 
uint8_t *buf,
 {
 QEMUIOVector qiov;
 struct iovec iov;
-Coroutine *co;
 BlkRwCo rwco;
 
 iov = (struct iovec) {
@@ -897,9 +896,14 @@ static int blk_prw(BlockBackend *blk, int64_t offset, 
uint8_t *buf,
 .ret= NOT_DONE,
 };
 
-co = qemu_coroutine_create(co_entry, &rwco);
-qemu_coroutine_enter(co);
-BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
+if (qemu_in_coroutine()) {
+/* Fast-path if already in coroutine context */
+co_entry(&rwco);
+} else {
+Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
+qemu_coroutine_enter(co);
+BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
+}
 
 return rwco.ret;
 }
-- 
2.9.3

[Qemu-devel] [PATCH 04/16] io: add methods to set I/O handlers on AioContext

2017-01-13 Thread Paolo Bonzini

This is in preparation for making qio_channel_yield work on
AioContexts other than the main one.

Signed-off-by: Paolo Bonzini 
---
 include/io/channel.h | 30 ++
 io/channel-command.c | 13 +
 io/channel-file.c| 11 +++
 io/channel-socket.c  | 16 +++-
 io/channel-tls.c | 12 
 io/channel-watch.c   |  6 ++
 io/channel.c | 11 +++
 7 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/include/io/channel.h b/include/io/channel.h
index 32a9470..665edd7 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -23,6 +23,7 @@
 
 #include "qemu-common.h"
 #include "qom/object.h"
+#include "block/aio.h"
 
 #define TYPE_QIO_CHANNEL "qio-channel"
 #define QIO_CHANNEL(obj)\
@@ -58,6 +59,8 @@ typedef gboolean (*QIOChannelFunc)(QIOChannel *ioc,
GIOCondition condition,
gpointer data);
 
+typedef struct QIOChannelRestart QIOChannelRestart;
+
 /**
  * QIOChannel:
  *
@@ -80,6 +83,9 @@ struct QIOChannel {
 Object parent;
 unsigned int features; /* bitmask of QIOChannelFeatures */
 char *name;
+AioContext *ctx;
+QIOChannelRestart *read_coroutine;
+QIOChannelRestart *write_coroutine;
 #ifdef _WIN32
 HANDLE event; /* For use with GSource on Win32 */
 #endif
@@ -132,6 +138,11 @@ struct QIOChannelClass {
  off_t offset,
  int whence,
  Error **errp);
+void (*io_set_aio_fd_handler)(QIOChannel *ioc,
+  AioContext *ctx,
+  IOHandler *io_read,
+  IOHandler *io_write,
+  void *opaque);
 };
 
 /* General I/O handling functions */
@@ -525,4 +536,23 @@ void qio_channel_yield(QIOChannel *ioc,
 void qio_channel_wait(QIOChannel *ioc,
   GIOCondition condition);
 
+/**
+ * qio_channel_set_aio_fd_handler:
+ * @ioc: the channel object
+ * @ctx: the AioContext to set the handlers on
+ * @io_read: the read handler
+ * @io_write: the write handler
+ * @opaque: the opaque value passed to the handler
+ *
+ * This is used internally by qio_channel_yield().  It can
+ * be used by channel implementations to forward the handlers
+ * to another channel (e.g. from #QIOChannelTLS to the
+ * underlying socket).
+ */
+void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
+AioContext *ctx,
+IOHandler *io_read,
+IOHandler *io_write,
+void *opaque);
+
 #endif /* QIO_CHANNEL_H */
diff --git a/io/channel-command.c b/io/channel-command.c
index ad25313..4000b61 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -328,6 +328,18 @@ static int qio_channel_command_close(QIOChannel *ioc,
 }
 
 
+static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc,
+AioContext *ctx,
+IOHandler *io_read,
+IOHandler *io_write,
+void *opaque)
+{
+QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
+aio_set_fd_handler(ctx, cioc->readfd, false, io_read, NULL, NULL, opaque);
+aio_set_fd_handler(ctx, cioc->writefd, false, NULL, io_write, NULL, 
opaque);
+}
+
+
 static GSource *qio_channel_command_create_watch(QIOChannel *ioc,
  GIOCondition condition)
 {
@@ -349,6 +361,7 @@ static void qio_channel_command_class_init(ObjectClass 
*klass,
 ioc_klass->io_set_blocking = qio_channel_command_set_blocking;
 ioc_klass->io_close = qio_channel_command_close;
 ioc_klass->io_create_watch = qio_channel_command_create_watch;
+ioc_klass->io_set_aio_fd_handler = qio_channel_command_set_aio_fd_handler;
 }
 
 static const TypeInfo qio_channel_command_info = {
diff --git a/io/channel-file.c b/io/channel-file.c
index e1da243..b383273 100644
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -186,6 +186,16 @@ static int qio_channel_file_close(QIOChannel *ioc,
 }
 
 
+static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc,
+AioContext *ctx,
+IOHandler *io_read,
+IOHandler *io_write,
+void *opaque)
+{
+QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
+aio_set_fd_handler(ctx, fioc->fd, false, io_read, io_write, NULL, opaque);
+}
+
 static GSource *qio_channel_file_create_watch(QIOChannel *ioc,
   GIOCondition condition)
 {
@@ -206,6 +216,7 @@ static void qio_channel_file_class_init(ObjectC

[Qemu-devel] [PATCH 00/16] aio_context_acquire/release pushdown, part 2

2017-01-13 Thread Paolo Bonzini

This series pushes down aio_context_acquire/release to the point
where we can actually reason on using different fine-grained mutexes.

The main infrastructure is introduced in patch 1.  The new API aio_co_wake
starts a coroutine with aio_context_acquire/release protection, which
requires tracking each coroutine's "home" AioContext.  aio_co_schedule
instead takes care of moving a sleeping coroutine to a different
AioContext, also ensuring that it runs under aio_context_acquire/release.
This is useful to implement bdrv_set_aio_context, as a simpler alternative
to bottom halves.  Even though one-shot BHs are already simpler than
what we had before, after this patch aio_co_wake and aio_co_schedule
save you from having to do aio_context_acquire/release explicitly.

After patch 2 and 3, which are just small preparatory changes, patches
4 to 7 provide an example of how to use the new API.  In particular patch
4 to 6 implement a new organization of coroutines in the NBD client,
which allows not blocking on partial reply header reads.

Patch 8 introduces helpers for AioContext locking in QED, which is
the most complex AIO-based driver left.  Then the actual meat of the
series runs from patch 9 to patch 13, followed by small optimizations
in patches 14 and 15.

The patches do some back and forth in adding/removing
aio_context_acquire/release calls in block/*.c but ultimately a small
number of aio_context_acquire/release pairs are added after the pushdown.
These are mostly in drivers that use external libraries (where they
actually could already be replaced by QemuMutex) and in device models.

Notably, coroutines need not care about aio_context_acquire/release.
The device models ensure that the first creation of the coroutine has
the AioContext, while aio_co_wake/aio_co_schedule do the same after
they yield.  Therefore, most of the files only need to use those two
functions instead of, respectively, qemu_coroutine_enter and
aio_bh_schedule_oneshot.

However, this is only an intermediate step which is needed because the
block layer and qemu-coroutine locks are thread-unsafe.  So the next
part will add separate locking, independent of AioContext, to block.c and
mostly block/io.c---this includes making CoMutex thread-safe.  Patch 16
therefore already documents the current locking policies block.h to
prepare for the next series.

Paolo

Paolo Bonzini (16):
  aio: introduce aio_co_schedule and aio_co_wake
  block-backend: allow blk_prw from coroutine context
  test-thread-pool: use generic AioContext infrastructure
  io: add methods to set I/O handlers on AioContext
  io: make qio_channel_yield aware of AioContexts
  nbd: do not block on partial reply header reads
  coroutine-lock: reschedule coroutine on the AioContext it was running
on
  qed: introduce qed_aio_start_io and qed_aio_next_io_cb
  aio: push aio_context_acquire/release down to dispatching
  block: explicitly acquire aiocontext in timers that need it
  block: explicitly acquire aiocontext in callbacks that need it
  block: explicitly acquire aiocontext in bottom halves that need it
  block: explicitly acquire aiocontext in aio callbacks that need it
  aio-posix: partially inline aio_dispatch into aio_poll
  async: remove unnecessary inc/dec pairs
  block: document fields protected by AioContext lock

 aio-posix.c|  60 +++-
 aio-win32.c|  30 ++
 async.c|  81 ++--
 block/blkdebug.c   |   9 +-
 block/blkreplay.c  |   2 +-
 block/block-backend.c  |  13 ++-
 block/curl.c   |  44 ++---
 block/gluster.c|   9 +-
 block/io.c |   4 +-
 block/iscsi.c  |  15 ++-
 block/linux-aio.c  |  10 +-
 block/mirror.c |  12 ++-
 block/nbd-client.c | 108 -
 block/nbd-client.h |   2 +-
 block/nfs.c|   9 +-
 block/qed-cluster.c|   2 +
 block/qed-table.c  |  12 ++-
 block/qed.c|  58 +++
 block/qed.h|   3 +
 block/sheepdog.c   |  29 +++---
 block/ssh.c|  29 ++
 block/throttle-groups.c|   2 +
 block/win32-aio.c  |   9 +-
 dma-helpers.c  |   2 +
 hw/block/virtio-blk.c  |  19 +++-
 hw/scsi/scsi-bus.c |   2 +
 hw/scsi/scsi-disk.c|  15 +++
 hw/scsi/scsi-generic.c |  20 +++-
 hw/scsi/virtio-scsi.c  |   6 ++
 include/block/aio.h|  38 +++-
 include/block/block_int.h  |  64 -
 include/io/channel.h   |  59 +++-
 include/qemu/coroutine_int.h   |  10 +-
 include/sysemu/block-backend.h |  14 ++-
 io/channel-command.c   |  13 +++
 io/channel-file.c  |  11 +++
 io/channel-socket.c|  16 +++-
 io/channel-tls.c   |  12 +++
 io/channel-watch.c

Re: [Qemu-devel] [PATCH] hw/i386: check if nvdimm is enabled before plugging

2017-01-13 Thread Stefan Hajnoczi

On Fri, Jan 13, 2017 at 07:56:51PM +0800, Haozhong Zhang wrote:
> The missing of 'nvdimm' in the machine type option '-M' means NVDIMM
> is disabled. QEMU should refuse to plug any NVDIMM device in this case
> and report the misconfiguration.
> 
> Reported-by: Stefan Hajnoczi 
> Signed-off-by: Haozhong Zhang 
> Message-Id: 20170112110928.GF4621@stefanha-x1.localdomain
> Message-Id: 20170111093630.2088-1-stefa...@redhat.com
> ---
>  hw/i386/pc.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 25e8586..3907609 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1715,6 +1715,11 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
>  }
>  
>  if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
> +if (!pcms->acpi_nvdimm_state.is_enabled) {
> +error_setg(&local_err,
> +   "nvdimm is not enabled: missing 'nvdimm' in '-M'");
> +goto out;
> +}

A warning is definitely useful to notify users of a possible
configuration error.

I wonder what happens when you plug an NVDIMM into a motherboard where
the firmware lacks support.  Does it:
 * Refuse to boot?
 * Treat the DIMM as regular RAM?
 * Boot but the DIMM will not be used by firmware and kernel?

QEMU should act the same way as real hardware.

Stefan


signature.asc
Description: PGP signature

[Qemu-devel] [PATCH 05/16] io: make qio_channel_yield aware of AioContexts

2017-01-13 Thread Paolo Bonzini

Support separate coroutines for reading and writing, and place the
read/write handlers on the AioContext that the QIOChannel is registered
with.

Signed-off-by: Paolo Bonzini 
---
 include/io/channel.h   | 37 ++
 io/channel.c   | 86 ++
 tests/Makefile.include |  2 +-
 3 files changed, 96 insertions(+), 29 deletions(-)

diff --git a/include/io/channel.h b/include/io/channel.h
index 665edd7..d7bad94 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -23,6 +23,7 @@
 
 #include "qemu-common.h"
 #include "qom/object.h"
+#include "qemu/coroutine.h"
 #include "block/aio.h"
 
 #define TYPE_QIO_CHANNEL "qio-channel"
@@ -59,8 +60,6 @@ typedef gboolean (*QIOChannelFunc)(QIOChannel *ioc,
GIOCondition condition,
gpointer data);
 
-typedef struct QIOChannelRestart QIOChannelRestart;
-
 /**
  * QIOChannel:
  *
@@ -84,8 +83,8 @@ struct QIOChannel {
 unsigned int features; /* bitmask of QIOChannelFeatures */
 char *name;
 AioContext *ctx;
-QIOChannelRestart *read_coroutine;
-QIOChannelRestart *write_coroutine;
+Coroutine *read_coroutine;
+Coroutine *write_coroutine;
 #ifdef _WIN32
 HANDLE event; /* For use with GSource on Win32 */
 #endif
@@ -508,13 +507,37 @@ guint qio_channel_add_watch(QIOChannel *ioc,
 
 
 /**
+ * qio_channel_set_aio_context:
+ * @ioc: the channel object
+ * @ctx: the #AioContext to set the handlers on
+ *
+ * Request that qio_channel_yield() sets I/O handlers on
+ * the given #AioContext.  If @ctx is %NULL, qio_channel_yield()
+ * uses QEMU's main thread event loop.
+ */
+void qio_channel_set_aio_context(QIOChannel *ioc,
+ AioContext *ctx);
+
+/**
+ * qio_channel_detach_aio_context:
+ * @ioc: the channel object
+ *
+ * Disable any I/O handlers set by qio_channel_yield().  With the
+ * help of aio_co_schedule(), this allows moving a coroutine that was
+ * paused by qio_channel_yield() to another context.
+ */
+void qio_channel_detach_aio_context(QIOChannel *ioc);
+
+/**
  * qio_channel_yield:
  * @ioc: the channel object
  * @condition: the I/O condition to wait for
  *
- * Yields execution from the current coroutine until
- * the condition indicated by @condition becomes
- * available.
+ * Yields execution from the current coroutine until the condition
+ * indicated by @condition becomes available.  @condition must
+ * be either %G_IO_IN or %G_IO_OUT; it cannot contain both.  In
+ * addition, no two coroutine can be waiting on the same condition
+ * and channel at the same time.
  *
  * This must only be called from coroutine context
  */
diff --git a/io/channel.c b/io/channel.c
index ce470d7..1e043bf 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -21,7 +21,7 @@
 #include "qemu/osdep.h"
 #include "io/channel.h"
 #include "qapi/error.h"
-#include "qemu/coroutine.h"
+#include "qemu/main-loop.h"
 
 bool qio_channel_has_feature(QIOChannel *ioc,
  QIOChannelFeature feature)
@@ -238,36 +238,80 @@ off_t qio_channel_io_seek(QIOChannel *ioc,
 }
 
 
-typedef struct QIOChannelYieldData QIOChannelYieldData;
-struct QIOChannelYieldData {
-QIOChannel *ioc;
-Coroutine *co;
-};
+static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc);
+
+static void qio_channel_restart_read(void *opaque)
+{
+QIOChannel *ioc = opaque;
+Coroutine *co = ioc->read_coroutine;
 
+ioc->read_coroutine = NULL;
+qio_channel_set_aio_fd_handlers(ioc);
+aio_co_wake(co);
+}
 
-static gboolean qio_channel_yield_enter(QIOChannel *ioc,
-GIOCondition condition,
-gpointer opaque)
+static void qio_channel_restart_write(void *opaque)
 {
-QIOChannelYieldData *data = opaque;
-qemu_coroutine_enter(data->co);
-return FALSE;
+QIOChannel *ioc = opaque;
+Coroutine *co = ioc->write_coroutine;
+
+ioc->write_coroutine = NULL;
+qio_channel_set_aio_fd_handlers(ioc);
+aio_co_wake(co);
 }
 
+static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc)
+{
+IOHandler *rd_handler = NULL, *wr_handler = NULL;
+AioContext *ctx;
+
+if (ioc->read_coroutine) {
+   rd_handler = qio_channel_restart_read;
+}
+if (ioc->write_coroutine) {
+   rd_handler = qio_channel_restart_write;
+}
+
+ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context();
+qio_channel_set_aio_fd_handler(ioc, ctx, rd_handler, wr_handler, ioc);
+}
+
+void qio_channel_set_aio_context(QIOChannel *ioc,
+ AioContext *ctx)
+{
+AioContext *old_ctx;
+if (ioc->ctx == ctx) {
+return;
+}
+
+old_ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context();
+qio_channel_set_aio_fd_handler(ioc, old_ctx, NULL, NULL, NULL);
+ioc->ctx = ctx;
+qio_channel_set_aio_fd_handlers(ioc);
+}
+
+void qio_channel_detach_aio_context(QIOChannel *ioc

[Qemu-devel] [PATCH 01/16] aio: introduce aio_co_schedule and aio_co_wake

2017-01-13 Thread Paolo Bonzini

aio_co_wake provides the infrastructure to start a coroutine on a "home"
AioContext.  It will be used by CoMutex and CoQueue, so that coroutines
don't jump from one context to another when they go to sleep on a
mutex or waitqueue.  However, it can also be used as a more efficient
alternative to one-shot bottom halves, and saves the effort of tracking
which AioContext a coroutine is running on.

aio_co_schedule is the part of aio_co_wake that starts a coroutine
on a remove AioContext, but it is also useful to implement e.g.
bdrv_set_aio_context callbacks.

The implementation of aio_co_schedule is based on a lock-free
multiple-producer, single-consumer queue.  The multiple producers use
cmpxchg to add to a LIFO stack.  The consumer (a per-AioContext bottom
half) grabs all items added so far, inverts the list to make it FIFO,
and goes through it one item at a time until it's empty.  The data
structure was inspired by OSv, which uses it in the very code we'll
"port" to QEMU for the thread-safe CoMutex.

Most of the new code is really tests.

Signed-off-by: Paolo Bonzini 
---
 async.c  |  65 +
 include/block/aio.h  |  32 +++
 include/qemu/coroutine_int.h |  10 +-
 tests/Makefile.include   |  13 ++-
 tests/iothread.c |  91 ++
 tests/iothread.h |  25 +
 tests/test-aio-multithread.c | 213 +++
 tests/test-vmstate.c |  11 ---
 trace-events |   4 +
 util/qemu-coroutine.c|   8 ++
 10 files changed, 456 insertions(+), 16 deletions(-)
 create mode 100644 tests/iothread.c
 create mode 100644 tests/iothread.h
 create mode 100644 tests/test-aio-multithread.c

diff --git a/async.c b/async.c
index 0d218ab..1338682 100644
--- a/async.c
+++ b/async.c
@@ -30,6 +30,8 @@
 #include "qemu/main-loop.h"
 #include "qemu/atomic.h"
 #include "block/raw-aio.h"
+#include "trace/generated-tracers.h"
+#include "qemu/coroutine_int.h"
 
 /***/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -274,6 +276,9 @@ aio_ctx_finalize(GSource *source)
 }
 #endif
 
+assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
+qemu_bh_delete(ctx->co_schedule_bh);
+
 qemu_lockcnt_lock(&ctx->list_lock);
 assert(!qemu_lockcnt_count(&ctx->list_lock));
 while (ctx->first_bh) {
@@ -363,6 +368,28 @@ static bool event_notifier_poll(void *opaque)
 return atomic_read(&ctx->notified);
 }
 
+static void co_schedule_bh_cb(void *opaque)
+{
+AioContext *ctx = opaque;
+QSLIST_HEAD(, Coroutine) straight, reversed;
+
+QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
+QSLIST_INIT(&straight);
+
+while (!QSLIST_EMPTY(&reversed)) {
+Coroutine *co = QSLIST_FIRST(&reversed);
+QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
+QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
+}
+
+while (!QSLIST_EMPTY(&straight)) {
+Coroutine *co = QSLIST_FIRST(&straight);
+QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
+trace_aio_co_schedule_bh_cb(ctx, co);
+qemu_coroutine_enter(co);
+}
+}
+
 AioContext *aio_context_new(Error **errp)
 {
 int ret;
@@ -378,6 +405,10 @@ AioContext *aio_context_new(Error **errp)
 }
 g_source_set_can_recurse(&ctx->source, true);
 qemu_lockcnt_init(&ctx->list_lock);
+
+ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
+QSLIST_INIT(&ctx->scheduled_coroutines);
+
 aio_set_event_notifier(ctx, &ctx->notifier,
false,
(EventNotifierHandler *)
@@ -401,6 +432,40 @@ fail:
 return NULL;
 }
 
+void aio_co_schedule(AioContext *ctx, Coroutine *co)
+{
+trace_aio_co_schedule(ctx, co);
+QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
+  co, co_scheduled_next);
+qemu_bh_schedule(ctx->co_schedule_bh);
+}
+
+void aio_co_wake(struct Coroutine *co)
+{
+AioContext *ctx;
+
+/* Read coroutine before co->ctx.  Matches smp_wmb in
+ * qemu_coroutine_enter.
+ */
+smp_read_barrier_depends();
+ctx = atomic_read(&co->ctx);
+
+if (ctx != qemu_get_current_aio_context()) {
+aio_co_schedule(ctx, co);
+return;
+}
+
+if (qemu_in_coroutine()) {
+Coroutine *self = qemu_coroutine_self();
+assert(self != co);
+QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
+} else {
+aio_context_acquire(ctx);
+qemu_coroutine_enter(co);
+aio_context_release(ctx);
+}
+}
+
 void aio_context_ref(AioContext *ctx)
 {
 g_source_ref(&ctx->source);
diff --git a/include/block/aio.h b/include/block/aio.h
index 7df271d..614cbc6 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -47,6 +47,7 @@ typedef void QEMUBHFunc(void *opaque);
 typedef bool AioPollFn(void *opaque);
 typedef void IOHandler(vo

[Qemu-devel] [PATCH 07/16] coroutine-lock: reschedule coroutine on the AioContext it was running on

2017-01-13 Thread Paolo Bonzini

As a small step towards the introduction of multiqueue, we want
coroutines to remain on the same AioContext that started them,
unless they are moved explicitly with e.g. aio_co_schedule.  This patch
avoids that coroutines switch AioContext when they use a CoMutex.
For now it does not make much of a difference, because the CoMutex
is not thread-safe and the AioContext itself is used to protect the
CoMutex from concurrent access.  However, this is going to change.

Signed-off-by: Paolo Bonzini 
---
 util/qemu-coroutine-lock.c | 5 ++---
 util/trace-events  | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
index 14cf9ce..e6afd1a 100644
--- a/util/qemu-coroutine-lock.c
+++ b/util/qemu-coroutine-lock.c
@@ -27,6 +27,7 @@
 #include "qemu/coroutine.h"
 #include "qemu/coroutine_int.h"
 #include "qemu/queue.h"
+#include "block/aio.h"
 #include "trace.h"
 
 void qemu_co_queue_init(CoQueue *queue)
@@ -63,7 +64,6 @@ void qemu_co_queue_run_restart(Coroutine *co)
 
 static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
 {
-Coroutine *self = qemu_coroutine_self();
 Coroutine *next;
 
 if (QSIMPLEQ_EMPTY(&queue->entries)) {
@@ -72,8 +72,7 @@ static bool qemu_co_queue_do_restart(CoQueue *queue, bool 
single)
 
 while ((next = QSIMPLEQ_FIRST(&queue->entries)) != NULL) {
 QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next);
-QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
-trace_qemu_co_queue_next(next);
+aio_co_wake(next);
 if (single) {
 break;
 }
diff --git a/util/trace-events b/util/trace-events
index 2b8aa30..65705c4 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -13,7 +13,6 @@ qemu_coroutine_terminate(void *co) "self %p"
 
 # util/qemu-coroutine-lock.c
 qemu_co_queue_run_restart(void *co) "co %p"
-qemu_co_queue_next(void *nxt) "next %p"
 qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
 qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
 qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
-- 
2.9.3

[Qemu-devel] [PATCH 12/16] block: explicitly acquire aiocontext in bottom halves that need it

2017-01-13 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 async.c   |  4 ++--
 block/archipelago.c   |  3 +++
 block/blkdebug.c  |  9 +
 block/blkreplay.c |  2 +-
 block/block-backend.c |  6 ++
 block/curl.c  | 26 ++
 block/gluster.c   |  9 +
 block/io.c|  6 +-
 block/iscsi.c |  6 +-
 block/linux-aio.c | 15 +--
 block/nfs.c   |  3 ++-
 block/null.c  |  4 
 block/qed.c   |  3 +++
 block/rbd.c   |  4 
 dma-helpers.c |  2 ++
 hw/block/virtio-blk.c |  2 ++
 hw/scsi/scsi-bus.c|  2 ++
 thread-pool.c |  2 ++
 18 files changed, 72 insertions(+), 36 deletions(-)

diff --git a/async.c b/async.c
index ccb70e7..0243ca9 100644
--- a/async.c
+++ b/async.c
@@ -113,9 +113,7 @@ int aio_bh_poll(AioContext *ctx)
 ret = 1;
 }
 bh->idle = 0;
-aio_context_acquire(ctx);
 aio_bh_call(bh);
-aio_context_release(ctx);
 }
 if (bh->deleted) {
 deleted = true;
@@ -388,7 +386,9 @@ static void co_schedule_bh_cb(void *opaque)
 Coroutine *co = QSLIST_FIRST(&straight);
 QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
 trace_aio_co_schedule_bh_cb(ctx, co);
+aio_context_acquire(ctx);
 qemu_coroutine_enter(co);
+aio_context_release(ctx);
 }
 }
 
diff --git a/block/archipelago.c b/block/archipelago.c
index 2449cfc..a624390 100644
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -310,8 +310,11 @@ static void qemu_archipelago_complete_aio(void *opaque)
 {
 AIORequestData *reqdata = (AIORequestData *) opaque;
 ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
+AioContext *ctx = bdrv_get_aio_context(aio_cb->common.bs);
 
+aio_context_acquire(ctx);
 aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
+aio_context_release(ctx);
 aio_cb->status = 0;
 
 qemu_aio_unref(aio_cb);
diff --git a/block/blkdebug.c b/block/blkdebug.c
index acccf85..259369d 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -405,12 +405,6 @@ out:
 return ret;
 }
 
-static void error_callback_bh(void *opaque)
-{
-Coroutine *co = opaque;
-qemu_coroutine_enter(co);
-}
-
 static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
 {
 BDRVBlkdebugState *s = bs->opaque;
@@ -423,8 +417,7 @@ static int inject_error(BlockDriverState *bs, BlkdebugRule 
*rule)
 }
 
 if (!immediately) {
-aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
-qemu_coroutine_self());
+aio_co_schedule(bdrv_get_aio_context(bs), qemu_coroutine_self());
 qemu_coroutine_yield();
 }
 
diff --git a/block/blkreplay.c b/block/blkreplay.c
index a741654..cfc8c5b 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -60,7 +60,7 @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
 static void blkreplay_bh_cb(void *opaque)
 {
 Request *req = opaque;
-qemu_coroutine_enter(req->co);
+aio_co_wake(req->co);
 qemu_bh_delete(req->bh);
 g_free(req);
 }
diff --git a/block/block-backend.c b/block/block-backend.c
index 1177598..bfc0e6b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -939,9 +939,12 @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags 
flags)
 static void error_callback_bh(void *opaque)
 {
 struct BlockBackendAIOCB *acb = opaque;
+AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
 
 bdrv_dec_in_flight(acb->common.bs);
+aio_context_acquire(ctx);
 acb->common.cb(acb->common.opaque, acb->ret);
+aio_context_release(ctx);
 qemu_aio_unref(acb);
 }
 
@@ -983,9 +986,12 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
 static void blk_aio_complete_bh(void *opaque)
 {
 BlkAioEmAIOCB *acb = opaque;
+AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
 
 assert(acb->has_returned);
+aio_context_acquire(ctx);
 blk_aio_complete(acb);
+aio_context_release(ctx);
 }
 
 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
diff --git a/block/curl.c b/block/curl.c
index 05b9ca3..f3f063b 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -796,13 +796,18 @@ static void curl_readv_bh_cb(void *p)
 {
 CURLState *state;
 int running;
+int ret = -EINPROGRESS;
 
 CURLAIOCB *acb = p;
-BDRVCURLState *s = acb->common.bs->opaque;
+BlockDriverState *bs = acb->common.bs;
+BDRVCURLState *s = bs->opaque;
+AioContext *ctx = bdrv_get_aio_context(bs);
 
 size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
 size_t end;
 
+aio_context_acquire(ctx);
+
 // In case we have the requested data already (e.g. read-ahead),
 // we can just call the callback and be done.
 switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
@@ -810,7 +815,7 @@ static void curl_readv_bh_cb(void *p)

[Qemu-devel] [PATCH 06/16] nbd: do not block on partial reply header reads

2017-01-13 Thread Paolo Bonzini

Read the replies from a coroutine, switching the read side between the
"read header" coroutine and the I/O coroutine that reads the body of
the reply.

qio_channel_yield is used so that the right coroutine is restarted
automatically, eliminating the need for send_coroutine in
NBDClientSession.

Signed-off-by: Paolo Bonzini 
---
 block/nbd-client.c | 108 +
 block/nbd-client.h |   2 +-
 nbd/client.c   |   2 +-
 nbd/common.c   |   9 +
 4 files changed, 45 insertions(+), 76 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 06f1532..eacc7a5 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -33,8 +33,9 @@
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
 
-static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
+static void nbd_recv_coroutines_enter_all(BlockDriverState *bs)
 {
+NBDClientSession *s = nbd_get_client_session(bs);
 int i;
 
 for (i = 0; i < MAX_NBD_REQUESTS; i++) {
@@ -42,6 +43,7 @@ static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
 qemu_coroutine_enter(s->recv_coroutine[i]);
 }
 }
+BDRV_POLL_WHILE(bs, s->read_reply_co);
 }
 
 static void nbd_teardown_connection(BlockDriverState *bs)
@@ -56,7 +58,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 qio_channel_shutdown(client->ioc,
  QIO_CHANNEL_SHUTDOWN_BOTH,
  NULL);
-nbd_recv_coroutines_enter_all(client);
+nbd_recv_coroutines_enter_all(bs);
 
 nbd_client_detach_aio_context(bs);
 object_unref(OBJECT(client->sioc));
@@ -65,54 +67,34 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 client->ioc = NULL;
 }
 
-static void nbd_reply_ready(void *opaque)
+static void nbd_read_reply_entry(void *opaque)
 {
-BlockDriverState *bs = opaque;
-NBDClientSession *s = nbd_get_client_session(bs);
+NBDClientSession *s = opaque;
 uint64_t i;
 int ret;
 
-if (!s->ioc) { /* Already closed */
-return;
-}
-
-if (s->reply.handle == 0) {
-/* No reply already in flight.  Fetch a header.  It is possible
- * that another thread has done the same thing in parallel, so
- * the socket is not readable anymore.
- */
+for (;;) {
+assert(s->reply.handle == 0);
 ret = nbd_receive_reply(s->ioc, &s->reply);
-if (ret == -EAGAIN) {
-return;
-}
 if (ret < 0) {
-s->reply.handle = 0;
-goto fail;
+break;
 }
-}
-
-/* There's no need for a mutex on the receive side, because the
- * handler acts as a synchronization point and ensures that only
- * one coroutine is called until the reply finishes.  */
-i = HANDLE_TO_INDEX(s, s->reply.handle);
-if (i >= MAX_NBD_REQUESTS) {
-goto fail;
-}
 
-if (s->recv_coroutine[i]) {
-qemu_coroutine_enter(s->recv_coroutine[i]);
-return;
-}
-
-fail:
-nbd_teardown_connection(bs);
-}
+/* There's no need for a mutex on the receive side, because the
+ * handler acts as a synchronization point and ensures that only
+ * one coroutine is called until the reply finishes.
+ */
+i = HANDLE_TO_INDEX(s, s->reply.handle);
+if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
+break;
+}
 
-static void nbd_restart_write(void *opaque)
-{
-BlockDriverState *bs = opaque;
+aio_co_wake(s->recv_coroutine[i]);
 
-qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
+/* We're woken up by the recv_coroutine itself.  */
+qemu_coroutine_yield();
+}
+s->read_reply_co = NULL;
 }
 
 static int nbd_co_send_request(BlockDriverState *bs,
@@ -120,7 +102,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
QEMUIOVector *qiov)
 {
 NBDClientSession *s = nbd_get_client_session(bs);
-AioContext *aio_context;
 int rc, ret, i;
 
 qemu_co_mutex_lock(&s->send_mutex);
@@ -141,11 +122,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
 return -EPIPE;
 }
 
-s->send_coroutine = qemu_coroutine_self();
-aio_context = bdrv_get_aio_context(bs);
-
-aio_set_fd_handler(aio_context, s->sioc->fd, false,
-   nbd_reply_ready, nbd_restart_write, NULL, bs);
 if (qiov) {
 qio_channel_set_cork(s->ioc, true);
 rc = nbd_send_request(s->ioc, request);
@@ -160,9 +136,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
 } else {
 rc = nbd_send_request(s->ioc, request);
 }
-aio_set_fd_handler(aio_context, s->sioc->fd, false,
-   nbd_reply_ready, NULL, NULL, bs);
-s->send_coroutine = NULL;
 qemu_co_mutex_unlock(&s->send_mutex);
 return rc;
 }
@@ -174,8

[Qemu-devel] [PATCH 11/16] block: explicitly acquire aiocontext in callbacks that need it

2017-01-13 Thread Paolo Bonzini

This covers both file descriptor callbacks and polling callbacks,
since they execute related code.

Signed-off-by: Paolo Bonzini 
---
 aio-posix.c   |  7 ---
 aio-win32.c   |  6 --
 block/curl.c  | 16 +---
 block/iscsi.c |  4 
 block/linux-aio.c |  4 
 block/nfs.c   |  6 ++
 block/sheepdog.c  | 29 +++--
 block/ssh.c   | 29 +
 block/win32-aio.c | 10 ++
 hw/block/virtio-blk.c |  5 -
 hw/scsi/virtio-scsi.c |  6 ++
 nbd/server.c  |  4 
 12 files changed, 71 insertions(+), 55 deletions(-)

diff --git a/aio-posix.c b/aio-posix.c
index 8d79cf3..6beebcd 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -402,9 +402,7 @@ static bool aio_dispatch_handlers(AioContext *ctx)
 (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
 aio_node_check(ctx, node->is_external) &&
 node->io_read) {
-aio_context_acquire(ctx);
 node->io_read(node->opaque);
-aio_context_release(ctx);
 
 /* aio_notify() does not count as progress */
 if (node->opaque != &ctx->notifier) {
@@ -415,9 +413,7 @@ static bool aio_dispatch_handlers(AioContext *ctx)
 (revents & (G_IO_OUT | G_IO_ERR)) &&
 aio_node_check(ctx, node->is_external) &&
 node->io_write) {
-aio_context_acquire(ctx);
 node->io_write(node->opaque);
-aio_context_release(ctx);
 progress = true;
 }
 
@@ -617,10 +613,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 }
 
-aio_context_acquire(ctx);
 progress = try_poll_mode(ctx, blocking);
-aio_context_release(ctx);
-
 if (!progress) {
 assert(npfd == 0);
 
diff --git a/aio-win32.c b/aio-win32.c
index 810e1c6..20b63ce 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -266,9 +266,7 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE 
event)
 (revents || event_notifier_get_handle(node->e) == event) &&
 node->io_notify) {
 node->pfd.revents = 0;
-aio_context_acquire(ctx);
 node->io_notify(node->e);
-aio_context_release(ctx);
 
 /* aio_notify() does not count as progress */
 if (node->e != &ctx->notifier) {
@@ -280,15 +278,11 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE 
event)
 (node->io_read || node->io_write)) {
 node->pfd.revents = 0;
 if ((revents & G_IO_IN) && node->io_read) {
-aio_context_acquire(ctx);
 node->io_read(node->opaque);
-aio_context_release(ctx);
 progress = true;
 }
 if ((revents & G_IO_OUT) && node->io_write) {
-aio_context_acquire(ctx);
 node->io_write(node->opaque);
-aio_context_release(ctx);
 progress = true;
 }
 
diff --git a/block/curl.c b/block/curl.c
index 65e6da1..05b9ca3 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -386,9 +386,8 @@ static void curl_multi_check_completion(BDRVCURLState *s)
 }
 }
 
-static void curl_multi_do(void *arg)
+static void curl_multi_do_locked(CURLState *s)
 {
-CURLState *s = (CURLState *)arg;
 CURLSocket *socket, *next_socket;
 int running;
 int r;
@@ -406,12 +405,23 @@ static void curl_multi_do(void *arg)
 }
 }
 
+static void curl_multi_do(void *arg)
+{
+CURLState *s = (CURLState *)arg;
+
+aio_context_acquire(s->s->aio_context);
+curl_multi_do_locked(s);
+aio_context_release(s->s->aio_context);
+}
+
 static void curl_multi_read(void *arg)
 {
 CURLState *s = (CURLState *)arg;
 
-curl_multi_do(arg);
+aio_context_acquire(s->s->aio_context);
+curl_multi_do_locked(s);
 curl_multi_check_completion(s->s);
+aio_context_release(s->s->aio_context);
 }
 
 static void curl_multi_timeout_do(void *arg)
diff --git a/block/iscsi.c b/block/iscsi.c
index e1f10d6..54d1381 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -394,8 +394,10 @@ iscsi_process_read(void *arg)
 IscsiLun *iscsilun = arg;
 struct iscsi_context *iscsi = iscsilun->iscsi;
 
+aio_context_acquire(iscsilun->aio_context);
 iscsi_service(iscsi, POLLIN);
 iscsi_set_events(iscsilun);
+aio_context_release(iscsilun->aio_context);
 }
 
 static void
@@ -404,8 +406,10 @@ iscsi_process_write(void *arg)
 IscsiLun *iscsilun = arg;
 struct iscsi_context *iscsi = iscsilun->iscsi;
 
+aio_context_acquire(iscsilun->aio_context);
 iscsi_service(iscsi, POLLOUT);
 iscsi_set_events(iscsilun);
+aio_context_release(iscsilun->aio_context);
 }
 
 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 03ab741..277c016 100644
--- a/block/linux-aio.c
+

[Qemu-devel] [PATCH 08/16] qed: introduce qed_aio_start_io and qed_aio_next_io_cb

2017-01-13 Thread Paolo Bonzini

qed_aio_start_io and qed_aio_next_io will not have to acquire/release
the AioContext, while qed_aio_next_io_cb will.  Split the functionality
and gain a little type-safety in the process.

Signed-off-by: Paolo Bonzini 
---
 block/qed.c | 39 +--
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/block/qed.c b/block/qed.c
index 1a7ef0a..7f1c508 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -273,7 +273,19 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
 return l2_table;
 }
 
-static void qed_aio_next_io(void *opaque, int ret);
+static void qed_aio_next_io(QEDAIOCB *acb, int ret);
+
+static void qed_aio_start_io(QEDAIOCB *acb)
+{
+qed_aio_next_io(acb, 0);
+}
+
+static void qed_aio_next_io_cb(void *opaque, int ret)
+{
+QEDAIOCB *acb = opaque;
+
+qed_aio_next_io(acb, ret);
+}
 
 static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
 {
@@ -292,7 +304,7 @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState 
*s)
 
 acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
 if (acb) {
-qed_aio_next_io(acb, 0);
+qed_aio_start_io(acb);
 }
 }
 
@@ -959,7 +971,7 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
 QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
 acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
 if (acb) {
-qed_aio_next_io(acb, 0);
+qed_aio_start_io(acb);
 } else if (s->header.features & QED_F_NEED_CHECK) {
 qed_start_need_check_timer(s);
 }
@@ -984,7 +996,7 @@ static void qed_commit_l2_update(void *opaque, int ret)
 acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
 assert(acb->request.l2_table != NULL);
 
-qed_aio_next_io(opaque, ret);
+qed_aio_next_io(acb, ret);
 }
 
 /**
@@ -1032,11 +1044,11 @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int 
ret, uint64_t offset)
 if (need_alloc) {
 /* Write out the whole new L2 table */
 qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
-qed_aio_write_l1_update, acb);
+   qed_aio_write_l1_update, acb);
 } else {
 /* Write out only the updated part of the L2 table */
 qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
-qed_aio_next_io, acb);
+   qed_aio_next_io_cb, acb);
 }
 return;
 
@@ -1088,7 +1100,7 @@ static void qed_aio_write_main(void *opaque, int ret)
 }
 
 if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
-next_fn = qed_aio_next_io;
+next_fn = qed_aio_next_io_cb;
 } else {
 if (s->bs->backing) {
 next_fn = qed_aio_write_flush_before_l2_update;
@@ -1201,7 +1213,7 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 if (acb->flags & QED_AIOCB_ZERO) {
 /* Skip ahead if the clusters are already zero */
 if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
-qed_aio_next_io(acb, 0);
+qed_aio_start_io(acb);
 return;
 }
 
@@ -1321,18 +1333,18 @@ static void qed_aio_read_data(void *opaque, int ret,
 /* Handle zero cluster and backing file reads */
 if (ret == QED_CLUSTER_ZERO) {
 qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
-qed_aio_next_io(acb, 0);
+qed_aio_start_io(acb);
 return;
 } else if (ret != QED_CLUSTER_FOUND) {
 qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-  &acb->backing_qiov, qed_aio_next_io, acb);
+  &acb->backing_qiov, qed_aio_next_io_cb, acb);
 return;
 }
 
 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
 bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
-   qed_aio_next_io, acb);
+   qed_aio_next_io_cb, acb);
 return;
 
 err:
@@ -1342,9 +1354,8 @@ err:
 /**
  * Begin next I/O or complete the request
  */
-static void qed_aio_next_io(void *opaque, int ret)
+static void qed_aio_next_io(QEDAIOCB *acb, int ret)
 {
-QEDAIOCB *acb = opaque;
 BDRVQEDState *s = acb_to_s(acb);
 QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
 qed_aio_write_data : qed_aio_read_data;
@@ -1400,7 +1411,7 @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
 qemu_iovec_init(&acb->cur_qiov, qiov->niov);
 
 /* Start request */
-qed_aio_next_io(acb, 0);
+qed_aio_start_io(acb);
 return &acb->common;
 }
 
-- 
2.9.3

1 2 3 >

1 - 100 of 205 matches

Mail list logo