[Qemu-devel] [PATCH v1 2/3] target-ppc: implement vnegw/d instructions

2016-10-11 Thread Nikunj A Dadhania
Vector Integer Negate Instructions:

vnegw: Vector Negate Word
vnegd: Vector Negate Doubleword

Signed-off-by: Nikunj A Dadhania 
---
 target-ppc/helper.h |  2 ++
 target-ppc/int_helper.c | 12 
 target-ppc/translate/vmx-impl.inc.c |  2 ++
 target-ppc/translate/vmx-ops.inc.c  |  2 ++
 4 files changed, 18 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 04c6421..5fcc546 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -272,6 +272,8 @@ DEF_HELPER_2(vextsh2w, void, avr, avr)
 DEF_HELPER_2(vextsb2d, void, avr, avr)
 DEF_HELPER_2(vextsh2d, void, avr, avr)
 DEF_HELPER_2(vextsw2d, void, avr, avr)
+DEF_HELPER_2(vnegw, void, avr, avr)
+DEF_HELPER_2(vnegd, void, avr, avr)
 DEF_HELPER_2(vupkhpx, void, avr, avr)
 DEF_HELPER_2(vupklpx, void, avr, avr)
 DEF_HELPER_2(vupkhsb, void, avr, avr)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 5aee0a8..7446e4e 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -1949,6 +1949,18 @@ VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
 #undef VEXT_SIGNED
 
+#define VNEG(name, element, mask)   \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *b)  \
+{   \
+int i;  \
+VECTOR_FOR_INORDER_I(i, element) {  \
+r->element[i] = -b->element[i]; \
+}   \
+}
+VNEG(vnegw, s32, UINT32_MAX)
+VNEG(vnegd, s64, UINT64_MAX)
+#undef VNEG
+
 #define VSPLTI(suffix, element, splat_type) \
 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
 {   \
diff --git a/target-ppc/translate/vmx-impl.inc.c 
b/target-ppc/translate/vmx-impl.inc.c
index c8998f3..563f101 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -815,6 +815,8 @@ GEN_VXFORM_NOA(vclzb, 1, 28)
 GEN_VXFORM_NOA(vclzh, 1, 29)
 GEN_VXFORM_NOA(vclzw, 1, 30)
 GEN_VXFORM_NOA(vclzd, 1, 31)
+GEN_VXFORM_NOA_2(vnegw, 1, 24, 6)
+GEN_VXFORM_NOA_2(vnegd, 1, 24, 7)
 GEN_VXFORM_NOA_2(vextsb2w, 1, 24, 16)
 GEN_VXFORM_NOA_2(vextsh2w, 1, 24, 17)
 GEN_VXFORM_NOA_2(vextsb2d, 1, 24, 24)
diff --git a/target-ppc/translate/vmx-ops.inc.c 
b/target-ppc/translate/vmx-ops.inc.c
index 68cba3e..ab64ab2 100644
--- a/target-ppc/translate/vmx-ops.inc.c
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -215,6 +215,8 @@ GEN_VXFORM_DUAL_INV(vspltish, vinserth, 6, 13, 0x, 
0x10,
 GEN_VXFORM_DUAL_INV(vspltisw, vinsertw, 6, 14, 0x, 0x10,
PPC_ALTIVEC),
 GEN_VXFORM_300_EXT(vinsertd, 6, 15, 0x10),
+GEN_VXFORM_300_EO(vnegw, 0x01, 0x18, 0x06),
+GEN_VXFORM_300_EO(vnegd, 0x01, 0x18, 0x07),
 GEN_VXFORM_300_EO(vextsb2w, 0x01, 0x18, 0x10),
 GEN_VXFORM_300_EO(vextsh2w, 0x01, 0x18, 0x11),
 GEN_VXFORM_300_EO(vextsb2d, 0x01, 0x18, 0x18),
-- 
2.7.4




[Qemu-devel] [PATCH v1 3/3] target-ppc: implement xxbr[qdwh] instruction

2016-10-11 Thread Nikunj A Dadhania
Add required helpers (GEN_XX2FORM_EO) for supporting this instruction.

xxbrh: VSX Vector Byte-Reverse Halfword
xxbrw: VSX Vector Byte-Reverse Word
xxbrd: VSX Vector Byte-Reverse Doubleword
xxbrq: VSX Vector Byte-Reverse Quadword

Signed-off-by: Nikunj A Dadhania 
---
 target-ppc/translate.c  | 32 +++
 target-ppc/translate/vsx-impl.inc.c | 77 +
 target-ppc/translate/vsx-ops.inc.c  |  8 
 3 files changed, 117 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index dab8f19..94989b2 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -376,6 +376,9 @@ GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, 
type2)
 #define GEN_HANDLER_E_2(name, opc1, opc2, opc3, opc4, inval, type, type2) \
 GEN_OPCODE3(name, opc1, opc2, opc3, opc4, inval, type, type2)
 
+#define GEN_HANDLER2_E_2(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2) 
\
+GEN_OPCODE4(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2)
+
 typedef struct opcode_t {
 unsigned char opc1, opc2, opc3, opc4;
 #if HOST_LONG_BITS == 64 /* Explicitly align to 64 bits */
@@ -662,6 +665,21 @@ EXTRACT_HELPER(IMM8, 11, 8);
 },\
 .oname = stringify(name), \
 }
+#define GEN_OPCODE4(name, onam, op1, op2, op3, op4, invl, _typ, _typ2)\
+{ \
+.opc1 = op1,  \
+.opc2 = op2,  \
+.opc3 = op3,  \
+.opc4 = op4,  \
+.handler = {  \
+.inval1  = invl,  \
+.type = _typ, \
+.type2 = _typ2,   \
+.handler = _##name,   \
+.oname = onam,\
+},\
+.oname = onam,\
+}
 #else
 #define GEN_OPCODE(name, op1, op2, op3, invl, _typ, _typ2)\
 { \
@@ -720,6 +738,20 @@ EXTRACT_HELPER(IMM8, 11, 8);
 },\
 .oname = stringify(name), \
 }
+#define GEN_OPCODE4(name, onam, op1, op2, op3, op4, invl, _typ, _typ2)\
+{ \
+.opc1 = op1,  \
+.opc2 = op2,  \
+.opc3 = op3,  \
+.opc4 = op4,  \
+.handler = {  \
+.inval1  = invl,  \
+.type = _typ, \
+.type2 = _typ2,   \
+.handler = _##name,   \
+},\
+.oname = onam,\
+}
 #endif
 
 /* SPR load/store helpers */
diff --git a/target-ppc/translate/vsx-impl.inc.c 
b/target-ppc/translate/vsx-impl.inc.c
index 23ec1e1..52af5c1 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -132,6 +132,22 @@ static void gen_bswap16x8(TCGv_i64 outh, TCGv_i64 outl,
 tcg_temp_free_i64(mask);
 }
 
+static void gen_bswap32x4(TCGv_i64 outh, TCGv_i64 outl,
+  TCGv_i64 inh, TCGv_i64 inl)
+{
+TCGv_i64 hi = tcg_temp_new_i64();
+TCGv_i64 lo = tcg_temp_new_i64();
+
+tcg_gen_bswap64_i64(hi, inh);
+tcg_gen_bswap64_i64(lo, inl);
+tcg_gen_shri_i64(outh, hi, 32);
+tcg_gen_deposit_i64(outh, outh, hi, 32, 32);
+tcg_gen_shri_i64(outl, lo, 32);
+tcg_gen_deposit_i64(outl, outl, lo, 32, 32);
+
+tcg_temp_free_i64(hi);
+tcg_temp_free_i64(lo);
+}
 static void gen_lxvh8x(DisasContext *ctx)
 {
 TCGv EA;
@@ -717,6 +733,67 @@ GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, 

[Qemu-devel] [PATCH v1 1/3] target-ppc: implement vexts[bh]2w and vexts[bhw]2d

2016-10-11 Thread Nikunj A Dadhania
Vector Extend Sign Instructions:

vextsb2w: Vector Extend Sign Byte To Word
vextsh2w: Vector Extend Sign Halfword To Word
vextsb2d: Vector Extend Sign Byte To Doubleword
vextsh2d: Vector Extend Sign Halfword To Doubleword
vextsw2d: Vector Extend Sign Word To Doubleword

Signed-off-by: Nikunj A Dadhania 
---
 target-ppc/helper.h |  5 +
 target-ppc/int_helper.c | 15 +++
 target-ppc/translate/vmx-impl.inc.c |  5 +
 target-ppc/translate/vmx-ops.inc.c  |  5 +
 4 files changed, 30 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 796ad45..04c6421 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -267,6 +267,11 @@ DEF_HELPER_3(vinsertb, void, avr, avr, i32)
 DEF_HELPER_3(vinserth, void, avr, avr, i32)
 DEF_HELPER_3(vinsertw, void, avr, avr, i32)
 DEF_HELPER_3(vinsertd, void, avr, avr, i32)
+DEF_HELPER_2(vextsb2w, void, avr, avr)
+DEF_HELPER_2(vextsh2w, void, avr, avr)
+DEF_HELPER_2(vextsb2d, void, avr, avr)
+DEF_HELPER_2(vextsh2d, void, avr, avr)
+DEF_HELPER_2(vextsw2d, void, avr, avr)
 DEF_HELPER_2(vupkhpx, void, avr, avr)
 DEF_HELPER_2(vupklpx, void, avr, avr)
 DEF_HELPER_2(vupkhsb, void, avr, avr)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 202854f..5aee0a8 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -1934,6 +1934,21 @@ VEXTRACT(uw, u32)
 VEXTRACT(d, u64)
 #undef VEXTRACT
 
+#define VEXT_SIGNED(name, element, mask, cast, recast)  \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *b)  \
+{   \
+int i;  \
+VECTOR_FOR_INORDER_I(i, element) {  \
+r->element[i] = (recast)((cast)(b->element[i] & mask)); \
+}   \
+}
+VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
+VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
+VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
+VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
+VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
+#undef VEXT_SIGNED
+
 #define VSPLTI(suffix, element, splat_type) \
 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
 {   \
diff --git a/target-ppc/translate/vmx-impl.inc.c 
b/target-ppc/translate/vmx-impl.inc.c
index 25cd073..c8998f3 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -815,6 +815,11 @@ GEN_VXFORM_NOA(vclzb, 1, 28)
 GEN_VXFORM_NOA(vclzh, 1, 29)
 GEN_VXFORM_NOA(vclzw, 1, 30)
 GEN_VXFORM_NOA(vclzd, 1, 31)
+GEN_VXFORM_NOA_2(vextsb2w, 1, 24, 16)
+GEN_VXFORM_NOA_2(vextsh2w, 1, 24, 17)
+GEN_VXFORM_NOA_2(vextsb2d, 1, 24, 24)
+GEN_VXFORM_NOA_2(vextsh2d, 1, 24, 25)
+GEN_VXFORM_NOA_2(vextsw2d, 1, 24, 26)
 GEN_VXFORM_NOA_2(vctzb, 1, 24, 28)
 GEN_VXFORM_NOA_2(vctzh, 1, 24, 29)
 GEN_VXFORM_NOA_2(vctzw, 1, 24, 30)
diff --git a/target-ppc/translate/vmx-ops.inc.c 
b/target-ppc/translate/vmx-ops.inc.c
index ac1dc9b..68cba3e 100644
--- a/target-ppc/translate/vmx-ops.inc.c
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -215,6 +215,11 @@ GEN_VXFORM_DUAL_INV(vspltish, vinserth, 6, 13, 0x, 
0x10,
 GEN_VXFORM_DUAL_INV(vspltisw, vinsertw, 6, 14, 0x, 0x10,
PPC_ALTIVEC),
 GEN_VXFORM_300_EXT(vinsertd, 6, 15, 0x10),
+GEN_VXFORM_300_EO(vextsb2w, 0x01, 0x18, 0x10),
+GEN_VXFORM_300_EO(vextsh2w, 0x01, 0x18, 0x11),
+GEN_VXFORM_300_EO(vextsb2d, 0x01, 0x18, 0x18),
+GEN_VXFORM_300_EO(vextsh2d, 0x01, 0x18, 0x19),
+GEN_VXFORM_300_EO(vextsw2d, 0x01, 0x18, 0x1A),
 GEN_VXFORM_300_EO(vctzb, 0x01, 0x18, 0x1C),
 GEN_VXFORM_300_EO(vctzh, 0x01, 0x18, 0x1D),
 GEN_VXFORM_300_EO(vctzw, 0x01, 0x18, 0x1E),
-- 
2.7.4




[Qemu-devel] [PATCH v1 0/3] POWER9 TCG enablements - part6

2016-10-11 Thread Nikunj A Dadhania
This series contains 11 new instructions for POWER9 ISA3.0
   Vector Extend Sign
   Vector Integer Negate 
   Vector Byte-Reverse

Patches:
01:
vextsb2w: Vector Extend Sign Byte To Word
vextsh2w: Vector Extend Sign Halfword To Word
vextsb2d: Vector Extend Sign Byte To Doubleword
vextsh2d: Vector Extend Sign Halfword To Doubleword
vextsw2d: Vector Extend Sign Word To Doubleword
02:
vnegw: Vector Negate Word
vnegd: Vector Negate Doubleword
03:
xxbrh: VSX Vector Byte-Reverse Halfword
xxbrw: VSX Vector Byte-Reverse Word
xxbrd: VSX Vector Byte-Reverse Doubleword
xxbrq: VSX Vector Byte-Reverse Quadword

Changelog:
* Added temporary in xxbrq
* Use negate directly in place for computing 2's compliment
* Use int8_t instead for char
* Dropped "VSX Scalar Compare" as fpu_helper needs change 
  with regard to exception flag handling

Nikunj A Dadhania (3):
  target-ppc: implement vexts[bh]2w and vexts[bhw]2d
  target-ppc: implement vnegw/d instructions
  target-ppc: implement xxbr[qdwh] instruction

 target-ppc/helper.h |  7 
 target-ppc/int_helper.c | 27 +
 target-ppc/translate.c  | 32 +++
 target-ppc/translate/vmx-impl.inc.c |  7 
 target-ppc/translate/vmx-ops.inc.c  |  7 
 target-ppc/translate/vsx-impl.inc.c | 77 +
 target-ppc/translate/vsx-ops.inc.c  |  8 
 7 files changed, 165 insertions(+)

-- 
2.7.4




Re: [Qemu-devel] [PATCH 0/4] POWER9 TCG enablements - part6

2016-10-11 Thread Nikunj A Dadhania
no-re...@patchew.org writes:
> Checking PATCH 3/4: target-ppc: implement xxbr[qdwh] instruction...
> ERROR: Macros with complex values should be enclosed in parenthesis
> #173: FILE: target-ppc/translate/vsx-ops.inc.c:42:
> +#define GEN_XX2FORM_EO(name, opc2, opc3, opc4, fl2)  
> \
> +GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 0, opc3, opc4, 0, PPC_NONE, fl2), 
> \
> +GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 1, opc3, opc4, 0, PPC_NONE, fl2)
>

Check patch doesn't like

#define FOO(x) \
BAR(x), \
BAR(x+1)

If I get rid of the "," the error goes away, which is not correct
for this use case. It is a false positive.

> total: 1 errors, 0 warnings, 156 lines checked
>
> Your patch has style problems, please review.  If any of these errors
> are false positives report them to the maintainer, see
> CHECKPATCH in MAINTAINERS.
>
> Checking PATCH 4/4: target-ppc: Add xscmp[eq, gt, ge, ne]dp instructions...
> === OUTPUT END ===
>
> Test command exited with code: 1
>

Regards
Nikunj




[Qemu-devel] Potential Bug in vIOMMU which may result in memory wasting

2016-10-11 Thread Liu, Yi L
Hi,

Resend it here since there is code style issue with debug patch in
previous email.

1. Problem description:
Recently, I find a strange thing with vIOMMU in QEMU. It looks like
g_hash_table_lookup() is not 100% give same result when the key
is the same. And this results in redundant memory allocation.
I'm not sure if it is expected. Has anyone else encountered it? 

This potential issue is in vtd_find_add_as(), it uses the input PCIBus
pointer to lookup hash table and get a vtd_bus back. If no hit,
allocate one and inset it to hash_table. In my understanding, a pci
bus only requires a single vtd_bus structure. But what I observed
denied it.

2. Command to start guest:
x86_64-softmmu/qemu-system-x86_64 -boot c
-hda /home/sky/vms/vm-for-svm/svm-disk.img
-m 5120 -enable-kvm -machine type=q35 -device intel-iommu
-net nic -net tap,ifname=tap0, script=no,downscript=no -device
vfio-pci,host=00:02.0,id=hostdev0,addr=0x6

3. Log:
a) First enter of vtd_find_add_as(), no corresponding vtd_bus in
s->vtd_as_by_busptr, so allocate one, this is quite reasonable.
--
YiLiu - vtd_find_add_as()
 bus: 0x5606747f99c0
 s: 0x560675a2e000
 s->vtd_as_by_busptr: 0x560675c521e0, devfn: 0x0
 lookup result: no vtd_bus, allocate one
 vtd_bus: 0x56067457a620
  vtd_bus in s->vtd_as_by_busptr: 0x56067457a620
lookup hash_table again, vtd_bus: 0x56067457a620

b) Second enter of vtd_find_add_as(), again no corresponding
vtd_bus in s->vtd_as_by_busptr, so allocate one, this is strange
since devfn: 0x30 is actually also under pci bus 0.
--
YiLiu - vtd_find_add_as()
 bus: 0x5606747f99c0
 s: 0x560675a2e000
 s->vtd_as_by_busptr: 0x560675c521e0, devfn: 0x30
 lookup result: no vtd_bus, allocate one
 vtd_bus: 0x56067598fef0
  vtd_bus in s->vtd_as_by_busptr: 0x56067457a620
  vtd_bus in s->vtd_as_by_busptr: 0x56067598fef0
lookup hash_table again, vtd_bus: 0x56067598fef0

c) Third enter of vtd_find_add_as(),no corresponding vtd_bus
in s->vtd_as_by_busptr, so allocate one, this also strange since
there should have a vtd_bus in hash table for pci bus 0.
--
YiLiu - vtd_find_add_as()
 bus: 0x5606747f99c0
 s: 0x560675a2e000
 s->vtd_as_by_busptr: 0x560675c521e0, devfn: 0x0
 lookup result: no vtd_bus, allocate one
 vtd_bus: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x56067457a620
  vtd_bus in s->vtd_as_by_busptr: 0x56067598fef0
lookup hash_table again, vtd_bus: 0x5606746aa400

d) reset seems to be correct, however, there is totally 3 vtd_bus
structure for pci bus 0. this is a waste of memory in my
understanding.
--
YiLiu - vtd_find_add_as()
 bus: 0x5606747f99c0
 s: 0x560675a2e000
 s->vtd_as_by_busptr: 0x560675c521e0, devfn: 0x8
 lookup result: got vtd_bus
 vtd_bus: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x56067457a620
  vtd_bus in s->vtd_as_by_busptr: 0x56067598fef0
lookup hash_table again, vtd_bus: 0x5606746aa400
--
YiLiu - vtd_find_add_as()
 bus: 0x5606747f99c0
 s: 0x560675a2e000
 s->vtd_as_by_busptr: 0x560675c521e0, devfn: 0x10
 lookup result: got vtd_bus
 vtd_bus: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x56067457a620
  vtd_bus in s->vtd_as_by_busptr: 0x56067598fef0
lookup hash_table again, vtd_bus: 0x5606746aa400
--
YiLiu - vtd_find_add_as()
 bus: 0x5606747f99c0
 s: 0x560675a2e000
 s->vtd_as_by_busptr: 0x560675c521e0, devfn: 0x30
 lookup result: got vtd_bus
 vtd_bus: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x56067457a620
  vtd_bus in s->vtd_as_by_busptr: 0x56067598fef0
lookup hash_table again, vtd_bus: 0x5606746aa400
--
YiLiu - vtd_find_add_as()
 bus: 0x5606747f99c0
 s: 0x560675a2e000
 s->vtd_as_by_busptr: 0x560675c521e0, devfn: 0xf8
 lookup result: got vtd_bus
 vtd_bus: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x56067457a620
  vtd_bus in s->vtd_as_by_busptr: 0x56067598fef0
lookup hash_table again, vtd_bus: 0x5606746aa400
--
YiLiu - vtd_find_add_as()
 bus: 0x5606747f99c0
 s: 0x560675a2e000
 s->vtd_as_by_busptr: 0x560675c521e0, devfn: 0xfa
 lookup result: got vtd_bus
 vtd_bus: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x56067457a620
  vtd_bus in s->vtd_as_by_busptr: 0x56067598fef0
lookup hash_table again, vtd_bus: 0x5606746aa400
--
YiLiu - vtd_find_add_as()
 bus: 0x5606747f99c0
 s: 0x560675a2e000
 s->vtd_as_by_busptr: 0x560675c521e0, devfn: 0xfb
 lookup result: got vtd_bus
 vtd_bus: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x5606746aa400
  vtd_bus in s->vtd_as_by_busptr: 0x56067457a620
  vtd_bus in s->vtd_as_by_busptr: 

Re: [Qemu-devel] [PATCH] replication: interrupt failover if the main device is closed

2016-10-11 Thread Changlong Xie

On 10/07/2016 08:21 PM, Paolo Bonzini wrote:

Without this change, there is a race condition in tests/test-replication.
Depending on how fast the failover job (active commit) runs, there is a
chance of two bad things happening:

1) replication_done can be called after the secondary has been closed
and hence when the BDRVReplicationState is not valid anymore.

2) two copies of the active disk are present during the
/replication/secondary/stop test (that test runs immediately after
/replication/secondary/start, which tests failover).  This causes the
corruption detector to fire.

Signed-off-by: Paolo Bonzini 


Reviewed-by: Changlong Xie 


---
  block/replication.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/block/replication.c b/block/replication.c
index 3bd1cf1..5231a00 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -133,6 +133,9 @@ static void replication_close(BlockDriverState *bs)
  if (s->replication_state == BLOCK_REPLICATION_RUNNING) {
  replication_stop(s->rs, false, NULL);
  }
+if (s->replication_state == BLOCK_REPLICATION_FAILOVER) {
+block_job_cancel_sync(s->active_disk->bs->job);
+}

  if (s->mode == REPLICATION_MODE_SECONDARY) {
  g_free(s->top_id);







[Qemu-devel] [PATCHv2 6/7] spapr_pci: Add a 64-bit MMIO window

2016-10-11 Thread David Gibson
On real hardware, and under pHyp, the PCI host bridges on Power machines
typically advertise two outbound MMIO windows from the guest's physical
memory space to PCI memory space:
  - A 32-bit window which maps onto 2GiB..4GiB in the PCI address space
  - A 64-bit window which maps onto a large region somewhere high in PCI
address space (traditionally this used an identity mapping from guest
physical address to PCI address, but that's not always the case)

The qemu implementation in spapr-pci-host-bridge, however, only supports a
single outbound MMIO window, however.  At least some Linux versions expect
the two windows however, so we arranged this window to map onto the PCI
memory space from 2 GiB..~64 GiB, then advertised it as two contiguous
windows, the "32-bit" window from 2G..4G and the "64-bit" window from
4G..~64G.

This approach means, however, that the 64G window is not naturally aligned.
In turn this limits the size of the largest BAR we can map (which does have
to be naturally aligned) to roughly half of the total window.  With some
large nVidia GPGPU cards which have huge memory BARs, this is starting to
be a problem.

This patch adds true support for separate 32-bit and 64-bit outbound MMIO
windows to the spapr-pci-host-bridge implementation, each of which can
be independently configured.  The 32-bit window always maps to 2G.. in PCI
space, but the PCI address of the 64-bit window can be configured (it
defaults to the same as the guest physical address).

So as not to break possible existing configurations, as long as a 64-bit
window is not specified, a large single window can be specified.  This
will appear the same way to the guest as the old approach, although it's
now implemented by two contiguous memory regions rather than a single one.

For now, this only adds the possibility of 64-bit windows.  The default
configuration still uses the legacy mode.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  | 12 ++---
 hw/ppc/spapr_pci.c  | 66 -
 include/hw/pci-host/spapr.h |  8 --
 include/hw/ppc/spapr.h  |  3 ++-
 4 files changed, 71 insertions(+), 18 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 7cb167c..764a871 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2372,7 +2372,8 @@ static HotpluggableCPUList 
*spapr_query_hotpluggable_cpus(MachineState *machine)
 
 static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
 uint64_t *buid, hwaddr *pio, hwaddr *pio_size,
-hwaddr *mmio, hwaddr *mmio_size,
+hwaddr *mmio32, hwaddr *mmio32_size,
+hwaddr *mmio64, hwaddr *mmio64_size,
 unsigned n_dma, uint32_t *liobns, Error **errp)
 {
 const uint64_t base_buid = 0x8002000ULL;
@@ -2407,8 +2408,13 @@ static void spapr_phb_placement(sPAPRMachineState 
*spapr, uint32_t index,
 phb_base = phb0_base + index * phb_spacing;
 *pio = phb_base + pio_offset;
 *pio_size = SPAPR_PCI_IO_WIN_SIZE;
-*mmio = phb_base + mmio_offset;
-*mmio_size = SPAPR_PCI_MMIO_WIN_SIZE;
+*mmio32 = phb_base + mmio_offset;
+*mmio32_size = SPAPR_PCI_MMIO_WIN_SIZE;
+/*
+ * We don't set the 64-bit MMIO window, relying on the PHB's
+ * fallback behaviour of automatically splitting a large "32-bit"
+ * window into contiguous 32-bit and 64-bit windows
+ */
 }
 
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index c0fc964..442185b 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1317,6 +1317,7 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
(uint32_t)-1)
 || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
 || (sphb->mem_win_addr != (hwaddr)-1)
+|| (sphb->mem64_win_addr != (hwaddr)-1)
 || (sphb->io_win_addr != (hwaddr)-1)) {
 error_setg(errp, "Either \"index\" or other parameters must"
" be specified for PAPR PHB, not both");
@@ -1326,6 +1327,7 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 smc->phb_placement(spapr, sphb->index,
>buid, >io_win_addr, >io_win_size,
>mem_win_addr, >mem_win_size,
+   >mem64_win_addr, >mem64_win_size,
windows_supported, sphb->dma_liobn, _err);
 if (local_err) {
 error_propagate(errp, local_err);
@@ -1354,6 +1356,38 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
+if (sphb->mem64_win_size != 0) {
+if (sphb->mem64_win_addr == (hwaddr)-1) {
+error_setg(errp,
+  

[Qemu-devel] [PATCHv2 4/7] spapr_pci: Delegate placement of PCI host bridges to machine type

2016-10-11 Thread David Gibson
The 'spapr-pci-host-bridge' represents the virtual PCI host bridge (PHB)
for a PAPR guest.  Unlike on x86, it's routine on Power (both bare metal
and PAPR guests) to have numerous independent PHBs, each controlling a
separate PCI domain.

There are two ways of configuring the spapr-pci-host-bridge device: first
it can be done fully manually, specifying the locations and sizes of all
the IO windows.  This gives the most control, but is very awkward with 6
mandatory parameters.  Alternatively just an "index" can be specified
which essentially selects from an array of predefined PHB locations.
The PHB at index 0 is automatically created as the default PHB.

The current set of default locations causes some problems for guests with
large RAM (> 1 TiB) or PCI devices with very large BARs (e.g. big nVidia
GPGPU cards via VFIO).  Obviously, for migration we can only change the
locations on a new machine type, however.

This is awkward, because the placement is currently decided within the
spapr-pci-host-bridge code, so it breaks abstraction to look inside the
machine type version.

So, this patch delegates the "default mode" PHB placement from the
spapr-pci-host-bridge device back to the machine type via a public method
in sPAPRMachineClass.  It's still a bit ugly, but it's about the best we
can do.

For now, this just changes where the calculation is done.  It doesn't
change the actual location of the host bridges, or any other behaviour.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  | 34 ++
 hw/ppc/spapr_pci.c  | 22 --
 include/hw/pci-host/spapr.h | 11 +--
 include/hw/ppc/spapr.h  |  4 
 4 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 03e3803..f6e9c2a 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2370,6 +2370,39 @@ static HotpluggableCPUList 
*spapr_query_hotpluggable_cpus(MachineState *machine)
 return head;
 }
 
+static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
+uint64_t *buid, hwaddr *pio, hwaddr *pio_size,
+hwaddr *mmio, hwaddr *mmio_size,
+unsigned n_dma, uint32_t *liobns, Error **errp)
+{
+const uint64_t base_buid = 0x8002000ULL;
+const hwaddr phb0_base = 0x100ULL; /* 1 TiB */
+const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
+const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
+const hwaddr pio_offset = 0x8000; /* 2 GiB */
+const uint32_t max_index = 255;
+
+hwaddr phb_base;
+int i;
+
+if (index > max_index) {
+error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
+   max_index);
+return;
+}
+
+*buid = base_buid + index;
+for (i = 0; i < n_dma; ++i) {
+liobns[i] = SPAPR_PCI_LIOBN(index, i);
+}
+
+phb_base = phb0_base + index * phb_spacing;
+*pio = phb_base + pio_offset;
+*pio_size = SPAPR_PCI_IO_WIN_SIZE;
+*mmio = phb_base + mmio_offset;
+*mmio_size = SPAPR_PCI_MMIO_WIN_SIZE;
+}
+
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -2406,6 +2439,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
 fwc->get_dev_path = spapr_get_fw_dev_path;
 nc->nmi_monitor_handler = spapr_nmi;
+smc->phb_placement = spapr_phb_placement;
 }
 
 static const TypeInfo spapr_machine_info = {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 4f00865..c0fc964 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1311,7 +1311,8 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
 
 if (sphb->index != (uint32_t)-1) {
-hwaddr windows_base;
+sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+Error *local_err = NULL;
 
 if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
(uint32_t)-1)
 || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
@@ -1322,21 +1323,14 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-if (sphb->index > SPAPR_PCI_MAX_INDEX) {
-error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
-   SPAPR_PCI_MAX_INDEX);
+smc->phb_placement(spapr, sphb->index,
+   >buid, >io_win_addr, >io_win_size,
+   >mem_win_addr, >mem_win_size,
+   windows_supported, sphb->dma_liobn, _err);
+if (local_err) {
+error_propagate(errp, local_err);
 return;
 }
-
-sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
-for (i = 0; i < 

[Qemu-devel] [PATCHv2 7/7] spapr: Improved placement of PCI host bridges in guest memory map

2016-10-11 Thread David Gibson
Currently, the MMIO space for accessing PCI on pseries guests begins at
1 TiB in guest address space.  Each PCI host bridge (PHB) has a 64 GiB
chunk of address space in which it places its outbound PIO and 32-bit and
64-bit MMIO windows.

This scheme as several problems:
  - It limits guest RAM to 1 TiB (though we have a limited fix for this
now)
  - It limits the total MMIO window to 64 GiB.  This is not always enough
for some of the large nVidia GPGPU cards
  - Putting all the windows into a single 64 GiB area means that naturally
aligning things within there will waste more address space.
In addition there was a miscalculation in some of the defaults, which meant
that the MMIO windows for each PHB actually slightly overran the 64 GiB
region for that PHB.  We got away without nasty consequences because
the overrun fit within an unused area at the beginning of the next PHB's
region, but it's not pretty.

This patch implements a new scheme which addresses those problems, and is
also closer to what bare metal hardware and pHyp guests generally use.

Because some guest versions (including most current distro kernels) can't
access PCI MMIO above 64 TiB, we put all the PCI windows between 32 TiB and
64 TiB.  This is broken into 1 TiB chunks.  The 1 TiB contains the PIO
(64 kiB) and 32-bit MMIO (2 GiB) windows for all of the PHBs.  Each
subsequent TiB chunk contains a naturally aligned 64-bit MMIO window for
one PHB each.

This reduces the number of allowed PHBs (without full manual configuration
of all the windows) from 256 to 31, but this should still be plenty in
practice.

We also change some of the default window sizes for manually configured
PHBs to saner values.

Finally we adjust some tests and libqos so that it correctly uses the new
default locations.  Ideally it would parse the device tree given to the
guest, but that's a more complex problem for another time.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  | 126 ++--
 hw/ppc/spapr_pci.c  |   5 +-
 include/hw/pci-host/spapr.h |   8 ++-
 tests/endianness-test.c |   3 +-
 tests/libqos/pci-spapr.c|   9 ++--
 tests/spapr-phb-test.c  |   2 +-
 6 files changed, 116 insertions(+), 37 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 764a871..d75e2a4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2376,27 +2376,42 @@ static void spapr_phb_placement(sPAPRMachineState 
*spapr, uint32_t index,
 hwaddr *mmio64, hwaddr *mmio64_size,
 unsigned n_dma, uint32_t *liobns, Error **errp)
 {
+/*
+ * New-style PHB window placement.
+ *
+ * Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window
+ * for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO
+ * windows.
+ *
+ * Some guest kernels can't work with MMIO windows above 1<<46
+ * (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB
+ *
+ * 32TiB..33TiB contains the PIO and 32-bit MMIO windows for all
+ * PHBs.  33..34TiB has the 64-bit MMIO window for PHB0, 34..35
+ * has the 64-bit window for PHB1 and so forth.
+ */
 const uint64_t base_buid = 0x8002000ULL;
-const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
-const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
-const hwaddr pio_offset = 0x8000; /* 2 GiB */
-const uint32_t max_index = 255;
-const hwaddr phb0_alignment = 0x100ULL; /* 1 TiB */
+const hwaddr mmio64_win_size = (1ULL << 40); /* 1 TiB */
 
-uint64_t ram_top = MACHINE(spapr)->ram_size;
-hwaddr phb0_base, phb_base;
+int max_phbs = (SPAPR_PCI_LIMIT - SPAPR_PCI_BASE) / mmio64_win_size - 1;
+hwaddr mmio32_base = SPAPR_PCI_BASE + SPAPR_PCI_MEM32_WIN_SIZE;
+hwaddr mmio64_base = SPAPR_PCI_BASE + mmio64_win_size;
 int i;
 
-if (MACHINE(spapr)->maxram_size > ram_top) {
-ram_top = spapr->hotplug_memory.base +
-memory_region_size(>hotplug_memory.mr);
-}
-
-phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment);
-
-if (index > max_index) {
+/* Sanity check natural alignments */
+assert((SPAPR_PCI_BASE % mmio64_win_size) == 0);
+assert((SPAPR_PCI_LIMIT % mmio64_win_size) == 0);
+assert((mmio64_win_size % SPAPR_PCI_MEM32_WIN_SIZE) == 0);
+assert((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) == 0);
+/* Sanity check bounds */
+assert((SPAPR_PCI_BASE + max_phbs * SPAPR_PCI_IO_WIN_SIZE)
+   <= mmio32_base);
+assert(mmio32_base + max_phbs * SPAPR_PCI_MEM32_WIN_SIZE
+   <= mmio64_base);
+
+if (index >= max_phbs) {
 error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
-   max_index);
+   max_phbs - 1);
 return;
 }
 
@@ -2405,16 +2420,14 @@ static void spapr_phb_placement(sPAPRMachineState 
*spapr, uint32_t index,
   

[Qemu-devel] [PATCHv2 5/7] spapr: Adjust placement of PCI host bridge to allow > 1TiB RAM

2016-10-11 Thread David Gibson
Currently the default PCI host bridge for the 'pseries' machine type is
constructed with its IO windows in the 1TiB..(1TiB + 64GiB) range in
guest memory space.  This means that if > 1TiB of guest RAM is specified,
the RAM will collide with the PCI IO windows, causing serious problems.

Problems won't be obvious until guest RAM goes a bit beyond 1TiB, because
there's a little unused space at the bottom of the area reserved for PCI,
but essentially this means that > 1TiB of RAM has never worked with the
pseries machine type.

This patch fixes this by altering the placement of PHBs on large-RAM VMs.
Instead of always placing the first PHB at 1TiB, it is placed at the next
1 TiB boundary after the maximum RAM address.

Technically, this changes behaviour in a migration-breaking way for
existing machines with > 1TiB maximum memory, but since having > 1 TiB
memory was broken anyway, this seems like a reasonable trade-off.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f6e9c2a..7cb167c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2376,15 +2376,23 @@ static void spapr_phb_placement(sPAPRMachineState 
*spapr, uint32_t index,
 unsigned n_dma, uint32_t *liobns, Error **errp)
 {
 const uint64_t base_buid = 0x8002000ULL;
-const hwaddr phb0_base = 0x100ULL; /* 1 TiB */
 const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
 const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
 const hwaddr pio_offset = 0x8000; /* 2 GiB */
 const uint32_t max_index = 255;
+const hwaddr phb0_alignment = 0x100ULL; /* 1 TiB */
 
-hwaddr phb_base;
+uint64_t ram_top = MACHINE(spapr)->ram_size;
+hwaddr phb0_base, phb_base;
 int i;
 
+if (MACHINE(spapr)->maxram_size > ram_top) {
+ram_top = spapr->hotplug_memory.base +
+memory_region_size(>hotplug_memory.mr);
+}
+
+phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment);
+
 if (index > max_index) {
 error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
max_index);
-- 
2.7.4




[Qemu-devel] [PATCHv2 3/7] libqos: Limit spapr-pci to 32-bit MMIO for now

2016-10-11 Thread David Gibson
Currently the functions in pci-spapr.c (like pci-pc.c on which it's based)
don't distinguish between 32-bit and 64-bit PCI MMIO.  At the moment, the
qemu side implementation is a bit weird and has a single MMIO window
straddling 32-bit and 64-bit regions, but we're likely to change that in
future.

In any case, pci-pc.c - and therefore the testcases using PCI - only handle
32-bit MMIOs for now.  For spapr despite whatever changes might happen with
the MMIO windows, the 32-bit window is likely to remain at 2..4 GiB in PCI
space.

So, explicitly limit pci-spapr.c to 32-bit MMIOs for now, we can add 64-bit
MMIO support back in when and if we need it.

Signed-off-by: David Gibson 
---
 tests/libqos/pci-spapr.c | 32 +++-
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/tests/libqos/pci-spapr.c b/tests/libqos/pci-spapr.c
index 3192903..558dfc3 100644
--- a/tests/libqos/pci-spapr.c
+++ b/tests/libqos/pci-spapr.c
@@ -32,8 +32,8 @@ typedef struct QPCIBusSPAPR {
 uint64_t pio_cpu_base;
 QPCIWindow pio;
 
-uint64_t mmio_cpu_base;
-QPCIWindow mmio;
+uint64_t mmio32_cpu_base;
+QPCIWindow mmio32;
 
 uint64_t pci_hole_start;
 uint64_t pci_hole_size;
@@ -58,7 +58,7 @@ static uint8_t qpci_spapr_io_readb(QPCIBus *bus, void *addr)
 if (port < s->pio.size) {
 v = readb(s->pio_cpu_base + port);
 } else {
-v = readb(s->mmio_cpu_base + port);
+v = readb(s->mmio32_cpu_base + port);
 }
 return v;
 }
@@ -71,7 +71,7 @@ static uint16_t qpci_spapr_io_readw(QPCIBus *bus, void *addr)
 if (port < s->pio.size) {
 v = readw(s->pio_cpu_base + port);
 } else {
-v = readw(s->mmio_cpu_base + port);
+v = readw(s->mmio32_cpu_base + port);
 }
 return bswap16(v);
 }
@@ -84,7 +84,7 @@ static uint32_t qpci_spapr_io_readl(QPCIBus *bus, void *addr)
 if (port < s->pio.size) {
 v = readl(s->pio_cpu_base + port);
 } else {
-v = readl(s->mmio_cpu_base + port);
+v = readl(s->mmio32_cpu_base + port);
 }
 return bswap32(v);
 }
@@ -96,7 +96,7 @@ static void qpci_spapr_io_writeb(QPCIBus *bus, void *addr, 
uint8_t value)
 if (port < s->pio.size) {
 writeb(s->pio_cpu_base + port, value);
 } else {
-writeb(s->mmio_cpu_base + port, value);
+writeb(s->mmio32_cpu_base + port, value);
 }
 }
 
@@ -108,7 +108,7 @@ static void qpci_spapr_io_writew(QPCIBus *bus, void *addr, 
uint16_t value)
 if (port < s->pio.size) {
 writew(s->pio_cpu_base + port, value);
 } else {
-writew(s->mmio_cpu_base + port, value);
+writew(s->mmio32_cpu_base + port, value);
 }
 }
 
@@ -120,7 +120,7 @@ static void qpci_spapr_io_writel(QPCIBus *bus, void *addr, 
uint32_t value)
 if (port < s->pio.size) {
 writel(s->pio_cpu_base + port, value);
 } else {
-writel(s->mmio_cpu_base + port, value);
+writel(s->mmio32_cpu_base + port, value);
 }
 }
 
@@ -235,12 +235,9 @@ static void qpci_spapr_iounmap(QPCIBus *bus, void *data)
 /* FIXME */
 }
 
-#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x8000ULL
 #define SPAPR_PCI_WINDOW_BASE0x100ULL
-#define SPAPR_PCI_WINDOW_SPACING 0x10ULL
-#define SPAPR_PCI_MMIO_WIN_OFF   0xA000
-#define SPAPR_PCI_MMIO_WIN_SIZE  (SPAPR_PCI_WINDOW_SPACING - \
- SPAPR_PCI_MEM_WIN_BUS_OFFSET)
+#define SPAPR_PCI_MMIO32_WIN_OFF 0xA000
+#define SPAPR_PCI_MMIO32_WIN_SIZE0x8000 /* 2 GiB */
 #define SPAPR_PCI_IO_WIN_OFF 0x8000
 #define SPAPR_PCI_IO_WIN_SIZE0x1
 
@@ -280,13 +277,14 @@ QPCIBus *qpci_init_spapr(QGuestAllocator *alloc)
 ret->pio.pci_base = 0;
 ret->pio.size = SPAPR_PCI_IO_WIN_SIZE;
 
-ret->mmio_cpu_base = SPAPR_PCI_WINDOW_BASE + SPAPR_PCI_MMIO_WIN_OFF;
-ret->mmio.pci_base = SPAPR_PCI_MEM_WIN_BUS_OFFSET;
-ret->mmio.size = SPAPR_PCI_MMIO_WIN_SIZE;
+/* 32-bit portion of the MMIO window is at PCI address 2..4 GiB */
+ret->mmio32_cpu_base = SPAPR_PCI_WINDOW_BASE + SPAPR_PCI_MMIO32_WIN_OFF;
+ret->mmio32.pci_base = 0x8000; /* 2 GiB */
+ret->mmio32.size = SPAPR_PCI_MMIO32_WIN_SIZE;
 
 ret->pci_hole_start = 0xC000;
 ret->pci_hole_size =
-ret->mmio.pci_base + ret->mmio.size - ret->pci_hole_start;
+ret->mmio32.pci_base + ret->mmio32.size - ret->pci_hole_start;
 ret->pci_hole_alloc = 0;
 
 ret->pci_iohole_start = 0xc000;
-- 
2.7.4




Re: [Qemu-devel] [PATCH 0/7] IGNORE, SORRY (was: Improve PCI IO window orgnaization for pseries)

2016-10-11 Thread David Gibson
On Wed, Oct 12, 2016 at 03:29:45PM +1100, David Gibson wrote:
> The current way we organize the IO windows into PCI space for the
> pseries machine type has several problems.
> 
>   - It makes it difficult to create very large MMIO spaces which is
> necessary for certain PCI devices with very large BARs.  This
> problem has been known for a while.
> 
>   - More recently we discovered a more serious problem: it prevents
> more than 1TiB of RAM being added to a pseries guest.
> 
>   - It doesn't make very efficient use of address space.
> 
> Fixing this is complicated by keeping migration from old versionss
> working and working out what things belong on which side of the
> abstraction barrier between the machine type and the host bridge
> device.
> 
> This series addresses all these problems.  Patches 1-3/7 perform
> preliminary cleanups to the spapr specific PCI test code, which we'll
> need to get the tests working with the changed implementation.  4-5/7
> represent a minimal fix for the most serious problem (the 1 TiB limit)
> - once polished, I'll consider submiting these for the stable branch.
> 6-7/7 complete a more comprehensive fix.

Sorry, realised I stupidly left a debug fprintf in there, and also got
one of the recipient emails wrong.  New spin coming momentarily.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[Qemu-devel] [PATCHv2 1/7] libqos: Isolate knowledge of spapr memory map to qpci_init_spapr()

2016-10-11 Thread David Gibson
The libqos code for accessing PCI on the spapr machine type uses IOBASE()
and MMIOBASE() macros to determine the address in the CPU memory map of
the windows to PCI address space.

This is a detail of the implementation of PCI in the machine type, it's not
specified by the PAPR standard.  Real guests would get the addresses of the
PCI windows from the device tree.

Finding the device tree in libqos would be awkward, but we can at least
localize this knowledge of the implementation to the init function, saving
it in the QPCIBusSPAPR structure for use by the accessors.

That leaves only one place to fix if we alter the location of the PCI
windows, as we're planning to do.

Signed-off-by: David Gibson 
---
 tests/libqos/pci-spapr.c | 113 +++
 1 file changed, 64 insertions(+), 49 deletions(-)

diff --git a/tests/libqos/pci-spapr.c b/tests/libqos/pci-spapr.c
index 2f73bad..1765a54 100644
--- a/tests/libqos/pci-spapr.c
+++ b/tests/libqos/pci-spapr.c
@@ -18,30 +18,23 @@
 
 /* From include/hw/pci-host/spapr.h */
 
-#define SPAPR_PCI_BASE_BUID  0x8002000ULL
-
-#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x8000ULL
-
-#define SPAPR_PCI_WINDOW_BASE0x100ULL
-#define SPAPR_PCI_WINDOW_SPACING 0x10ULL
-#define SPAPR_PCI_MMIO_WIN_OFF   0xA000
-#define SPAPR_PCI_MMIO_WIN_SIZE  (SPAPR_PCI_WINDOW_SPACING - \
- SPAPR_PCI_MEM_WIN_BUS_OFFSET)
-#define SPAPR_PCI_IO_WIN_OFF 0x8000
-#define SPAPR_PCI_IO_WIN_SIZE0x1
-
-/* index is the phb index */
-
-#define BUIDBASE(index)  (SPAPR_PCI_BASE_BUID + (index))
-#define PCIBASE(index)   (SPAPR_PCI_WINDOW_BASE + \
-  (index) * SPAPR_PCI_WINDOW_SPACING)
-#define IOBASE(index)(PCIBASE(index) + SPAPR_PCI_IO_WIN_OFF)
-#define MMIOBASE(index)  (PCIBASE(index) + SPAPR_PCI_MMIO_WIN_OFF)
+typedef struct QPCIWindow {
+uint64_t pci_base;/* window address in PCI space */
+uint64_t size;/* window size */
+} QPCIWindow;
 
 typedef struct QPCIBusSPAPR {
 QPCIBus bus;
 QGuestAllocator *alloc;
 
+uint64_t buid;
+
+uint64_t pio_cpu_base;
+QPCIWindow pio;
+
+uint64_t mmio_cpu_base;
+QPCIWindow mmio;
+
 uint64_t pci_hole_start;
 uint64_t pci_hole_size;
 uint64_t pci_hole_alloc;
@@ -59,69 +52,75 @@ typedef struct QPCIBusSPAPR {
 
 static uint8_t qpci_spapr_io_readb(QPCIBus *bus, void *addr)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 uint64_t port = (uintptr_t)addr;
 uint8_t v;
-if (port < SPAPR_PCI_IO_WIN_SIZE) {
-v = readb(IOBASE(0) + port);
+if (port < s->pio.size) {
+v = readb(s->pio_cpu_base + port);
 } else {
-v = readb(MMIOBASE(0) + port);
+v = readb(s->mmio_cpu_base + port);
 }
 return v;
 }
 
 static uint16_t qpci_spapr_io_readw(QPCIBus *bus, void *addr)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 uint64_t port = (uintptr_t)addr;
 uint16_t v;
-if (port < SPAPR_PCI_IO_WIN_SIZE) {
-v = readw(IOBASE(0) + port);
+if (port < s->pio.size) {
+v = readw(s->pio_cpu_base + port);
 } else {
-v = readw(MMIOBASE(0) + port);
+v = readw(s->mmio_cpu_base + port);
 }
 return bswap16(v);
 }
 
 static uint32_t qpci_spapr_io_readl(QPCIBus *bus, void *addr)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 uint64_t port = (uintptr_t)addr;
 uint32_t v;
-if (port < SPAPR_PCI_IO_WIN_SIZE) {
-v = readl(IOBASE(0) + port);
+if (port < s->pio.size) {
+v = readl(s->pio_cpu_base + port);
 } else {
-v = readl(MMIOBASE(0) + port);
+v = readl(s->mmio_cpu_base + port);
 }
 return bswap32(v);
 }
 
 static void qpci_spapr_io_writeb(QPCIBus *bus, void *addr, uint8_t value)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 uint64_t port = (uintptr_t)addr;
-if (port < SPAPR_PCI_IO_WIN_SIZE) {
-writeb(IOBASE(0) + port, value);
+if (port < s->pio.size) {
+writeb(s->pio_cpu_base + port, value);
 } else {
-writeb(MMIOBASE(0) + port, value);
+writeb(s->mmio_cpu_base + port, value);
 }
 }
 
 static void qpci_spapr_io_writew(QPCIBus *bus, void *addr, uint16_t value)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 uint64_t port = (uintptr_t)addr;
 value = bswap16(value);
-if (port < SPAPR_PCI_IO_WIN_SIZE) {
-writew(IOBASE(0) + port, value);
+if (port < s->pio.size) {
+writew(s->pio_cpu_base + port, value);
 } else {
-writew(MMIOBASE(0) + port, value);
+writew(s->mmio_cpu_base + port, value);
 }
 }
 
 static void qpci_spapr_io_writel(QPCIBus *bus, void *addr, uint32_t value)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 

[Qemu-devel] x86 Instruction Testing?

2016-10-11 Thread Taylor Edward Heimbichner
Hello,

I'm a student researcher with the University of Arizona's Computer Science
Department. I'm writing to ask if any executable that you use to test QEMU
systematically tests all, or at least a large portion, of the x86
instruction set? We're working on a project that involves dynamic taint
analysis and would like to use an executable like this to comprehensively
test our work. Thank you.

Sincerely,
Taylor Heimbichner

University of Arizona Computer Science Department


[Qemu-devel] [Bug 1319100] Re: qemu-arm-static bug in signal handling causes mono and java to hang

2016-10-11 Thread Chris L
Did this fix end up making it into QEMU 2.7?

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1319100

Title:
  qemu-arm-static bug in signal handling causes mono and java to hang

Status in QEMU:
  Fix Committed
Status in qemu-kvm package in Ubuntu:
  Confirmed
Status in qemu-kvm package in Debian:
  Confirmed

Bug description:
  Note, this bug is already reported to debian, but it seems to also affect the 
upstream code.
  https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=748043

  running mono in a chroot environment with qemu-user-static is not posible
  because at least one signal used during termination of mono is routed to the
  host.

  This can be reproduced by:
  debootstrap --include=mono-runtime --foreign --arch=armel "wheezy" 
"mono-test" "http://ftp.de.debian.org//debian;
  cp /usr/bin/qemu-arm-static mono-test/usr/bin
  mount -t proc none mono-test/proc
  mount -o bind /dev mono-test/dev
  mount -o bind /sys mono-test/sys
  chroot mono-test
  ../debootstrap/debootstrap --second-stage
  exit
  mount -t proc none mono-test/proc
  mount -o bind /sys mono-test/sys
  chroot mono-test
  QEMU_STRACE=1 /usr/bin/mono /usr/lib/mono/4.0/gacutil.exe

  This will block on a futex:

  --8<--
  18663 sched_yield(0,0,2582980,0,0,2582928) = 0
  18663 clock_gettime(1,-150996384,2,1,2585016,2585600) = 0
  18663 tgkill(18663,18664,30,18664,30,-161951744) = 0
  18663 futex(0x00293774,FUTEX_PRIVATE_FLAG|FUTEX_WAIT,0,NULL,NULL,0)
  --8<--

  If you use mono within strace on a native x86 box you can see, that signals
  between threads are used during termination:

  strace -f -o log.txt /usr/bin/mono /usr/lib/mono/4.0/gacutil.exe

  --8<--
  14075 sched_yield() = 0   
  
  14075 tgkill(14075, 14083, SIGPWR)  = 0   
  
  14075 futex(0x983f00, FUTEX_WAIT_PRIVATE, 0, NULL 
  
  14083 <... futex resumed> ) = ? ERESTARTSYS (To be restarted) 
  
  14083 --- SIGPWR (Power failure) @ 0 (0) ---  
  
  14083 futex(0x983f00, FUTEX_WAKE_PRIVATE, 1) = 1  
  
  14075 <... futex resumed> ) = 0   
  
  14083 rt_sigsuspend(~[INT QUIT ABRT TERM XCPU RTMIN RT_1] 
  
  14075 futex(0x94d9a4, FUTEX_CMP_REQUEUE_PRIVATE, 1, 2147483647, 0x94da20, 24) 
= 3
  14078 <... futex resumed> ) = 0   
  
  14078 futex(0x94da20, FUTEX_WAKE_PRIVATE, 1) = 1  
  
  14077 <... futex resumed> ) = 0   
  
  14075 futex(0x94d9a4, FUTEX_CMP_REQUEUE_PRIVATE, 1, 2147483647, 0x94da20, 26 

  --8<--

  This also blocks the installation of libnunit2.6-cil within a armel chroot,
  because it uses mono in its postinst script.
  E.g. (/usr/bin/mono /usr/share/mono/MonoGetAssemblyName.exe 
/usr/lib/cli/nunit.core-2.6/nunit.core.dll)

  Obviously the same as described in:
  http://lists.opensuse.org/opensuse-arm/2011-12/msg0.html
  is happening here.

  There is an openSuSE patch against qemu:
  
https://build.opensuse.org/package/view_file/Virtualization:Qemu/qemu/0002-XXX-work-around-SA_RESTART-race-wit.patch?expand=1

  This patch also applies against qemu from backports-wheezy and resolves this
  issue.

  As it seems, that this issue is not Debian specific i will also report it to
  the qemu project and reference this bug report.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1319100/+subscriptions



Re: [Qemu-devel] [PATCH] char: serial: check divider value against baud base

2016-10-11 Thread Huawei PSIRT
Dear,

Thank you very much for your support. And will you assign a CVE-ID for the
issue?

Best regards,
Huawei PSIRT

-邮件原件-
发件人: P J P [mailto:ppan...@redhat.com] 
发送时间: 2016年10月12日 0:28
收件人: Qemu Developers
抄送: Michael S. Tsirkin; Paolo Bonzini; Huawei PSIRT; Prasad J Pandit
主题: [PATCH] char: serial: check divider value against baud base

From: Prasad J Pandit 

16550A UART device uses an oscillator to generate frequencies (baud base),
which decide communication speed. This speed could be changed by dividing it
by a divider. If the divider is greater than the baud base, speed is set to
zero, leading to a divide by zero error. Add check to avoid it.

Reported-by: Huawei PSIRT 
Signed-off-by: Prasad J Pandit 
---
 hw/char/serial.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/char/serial.c b/hw/char/serial.c index 3442f47..f659bbd
100644
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -153,7 +153,7 @@ static void serial_update_parameters(SerialState *s)
 int speed, parity, data_bits, stop_bits, frame_size;
 QEMUSerialSetParams ssp;
 
-if (s->divider == 0)
+if (s->divider == 0 || s->divider > s->baudbase)
 return;
 
 /* Start bit. */
--
2.5.5




[Qemu-devel] [PATCHv2 0/7] Improve PCI IO window orgnaization for pseries

2016-10-11 Thread David Gibson
The current way we organize the IO windows into PCI space for the
pseries machine type has several problems.

  - It makes it difficult to create very large MMIO spaces which is
necessary for certain PCI devices with very large BARs.  This
problem has been known for a while.

  - More recently we discovered a more serious problem: it prevents
more than 1TiB of RAM being added to a pseries guest.

  - It doesn't make very efficient use of address space.

Fixing this is complicated by keeping migration from old versionss
working and working out what things belong on which side of the
abstraction barrier between the machine type and the host bridge
device.

This series addresses all these problems.  Patches 1-3/7 perform
preliminary cleanups to the spapr specific PCI test code, which we'll
need to get the tests working with the changed implementation.  4-5/7
represent a minimal fix for the most serious problem (the 1 TiB limit)
- once polished, I'll consider submiting these for the stable branch.
6-7/7 complete a more comprehensive fix.

Changes since v1:
  * Removed a debugging printf()
Changes since RFC:
  * Rebase
  * Fixed some bugs
  * Fixed up PCI testcases which were broken by the change (due to
test limitations)
  * Seriously contemplated, then rejected a completely different
approach

David Gibson (7):
  libqos: Isolate knowledge of spapr memory map to qpci_init_spapr()
  libqos: Correct error in PCI hole sizing for spapr
  libqos: Limit spapr-pci to 32-bit MMIO for now
  spapr_pci: Delegate placement of PCI host bridges to machine type
  spapr: Adjust placement of PCI host bridge to allow > 1TiB RAM
  spapr_pci: Add a 64-bit MMIO window
  spapr: Improved placement of PCI host bridges in guest memory map

 hw/ppc/spapr.c  | 124 +++-
 hw/ppc/spapr_pci.c  |  91 ++--
 include/hw/pci-host/spapr.h |  25 -
 include/hw/ppc/spapr.h  |   5 ++
 tests/endianness-test.c |   3 +-
 tests/libqos/pci-spapr.c| 116 +++--
 tests/spapr-phb-test.c  |   2 +-
 7 files changed, 272 insertions(+), 94 deletions(-)

-- 
2.7.4




[Qemu-devel] [PATCHv2 2/7] libqos: Correct error in PCI hole sizing for spapr

2016-10-11 Thread David Gibson
In pci-spapr.c (as in pci-pc.c from which it was derived), the
pci_hole_start/pci_hole_size and pci_iohole_start/pci_iohole_size pairs[1]
essentially define the region of PCI (not CPU) addresses in which MMIO
or PIO BARs respectively will be allocated.

The size value is relative to the start value.  But in pci-spapr.c it is
set to the entire size of the window supported by the (emulated) hardware,
but the start values are *not* at the beginning of the emulated windows.

That means if you tried to map enough PCI BARs, we'd messily overrun the
IO windows, instead of failing in iomap as we should.

This patch corrects this by calculating the hole sizes from the location
of the window in PCI space and the hole start.

[1] Those are bad names, but that's a problem for another time.

Signed-off-by: David Gibson 
---
 tests/libqos/pci-spapr.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/libqos/pci-spapr.c b/tests/libqos/pci-spapr.c
index 1765a54..3192903 100644
--- a/tests/libqos/pci-spapr.c
+++ b/tests/libqos/pci-spapr.c
@@ -285,11 +285,13 @@ QPCIBus *qpci_init_spapr(QGuestAllocator *alloc)
 ret->mmio.size = SPAPR_PCI_MMIO_WIN_SIZE;
 
 ret->pci_hole_start = 0xC000;
-ret->pci_hole_size = SPAPR_PCI_MMIO_WIN_SIZE;
+ret->pci_hole_size =
+ret->mmio.pci_base + ret->mmio.size - ret->pci_hole_start;
 ret->pci_hole_alloc = 0;
 
 ret->pci_iohole_start = 0xc000;
-ret->pci_iohole_size = SPAPR_PCI_IO_WIN_SIZE;
+ret->pci_iohole_size =
+ret->pio.pci_base + ret->pio.size - ret->pci_iohole_start;
 ret->pci_iohole_alloc = 0;
 
 return >bus;
-- 
2.7.4




[Qemu-devel] [PATCH v3] block/replication: Clarify 'top-id' parameter usage

2016-10-11 Thread Changlong Xie
The replication driver only supports the 'top-id' parameter for the
secondary side; it must not be supplied for the primary side.

Reviewed-by: Eric Blake 
Signed-off-by: Changlong Xie 
---
 block/replication.c  | 5 +
 qapi/block-core.json | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/block/replication.c b/block/replication.c
index 3bd1cf1..8bbfc8f 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -101,6 +101,11 @@ static int replication_open(BlockDriverState *bs, QDict 
*options,
 
 if (!strcmp(mode, "primary")) {
 s->mode = REPLICATION_MODE_PRIMARY;
+top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
+if (top_id) {
+error_setg(_err, "The primary side does not support option 
top-id");
+goto fail;
+}
 } else if (!strcmp(mode, "secondary")) {
 s->mode = REPLICATION_MODE_SECONDARY;
 top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1b7aa1b..74b5c69 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2197,7 +2197,8 @@
 # @mode: the replication mode
 #
 # @top-id: #optional In secondary mode, node name or device ID of the root
-#  node who owns the replication node chain. Ignored in primary mode.
+#  node who owns the replication node chain. Must not be given in
+#  primary mode.
 #
 # Since: 2.8
 ##
-- 
1.9.3






[Qemu-devel] [PATCH 1/7] libqos: Isolate knowledge of spapr memory map to qpci_init_spapr()

2016-10-11 Thread David Gibson
The libqos code for accessing PCI on the spapr machine type uses IOBASE()
and MMIOBASE() macros to determine the address in the CPU memory map of
the windows to PCI address space.

This is a detail of the implementation of PCI in the machine type, it's not
specified by the PAPR standard.  Real guests would get the addresses of the
PCI windows from the device tree.

Finding the device tree in libqos would be awkward, but we can at least
localize this knowledge of the implementation to the init function, saving
it in the QPCIBusSPAPR structure for use by the accessors.

That leaves only one place to fix if we alter the location of the PCI
windows, as we're planning to do.

Signed-off-by: David Gibson 
---
 tests/libqos/pci-spapr.c | 113 +++
 1 file changed, 64 insertions(+), 49 deletions(-)

diff --git a/tests/libqos/pci-spapr.c b/tests/libqos/pci-spapr.c
index 2f73bad..1765a54 100644
--- a/tests/libqos/pci-spapr.c
+++ b/tests/libqos/pci-spapr.c
@@ -18,30 +18,23 @@
 
 /* From include/hw/pci-host/spapr.h */
 
-#define SPAPR_PCI_BASE_BUID  0x8002000ULL
-
-#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x8000ULL
-
-#define SPAPR_PCI_WINDOW_BASE0x100ULL
-#define SPAPR_PCI_WINDOW_SPACING 0x10ULL
-#define SPAPR_PCI_MMIO_WIN_OFF   0xA000
-#define SPAPR_PCI_MMIO_WIN_SIZE  (SPAPR_PCI_WINDOW_SPACING - \
- SPAPR_PCI_MEM_WIN_BUS_OFFSET)
-#define SPAPR_PCI_IO_WIN_OFF 0x8000
-#define SPAPR_PCI_IO_WIN_SIZE0x1
-
-/* index is the phb index */
-
-#define BUIDBASE(index)  (SPAPR_PCI_BASE_BUID + (index))
-#define PCIBASE(index)   (SPAPR_PCI_WINDOW_BASE + \
-  (index) * SPAPR_PCI_WINDOW_SPACING)
-#define IOBASE(index)(PCIBASE(index) + SPAPR_PCI_IO_WIN_OFF)
-#define MMIOBASE(index)  (PCIBASE(index) + SPAPR_PCI_MMIO_WIN_OFF)
+typedef struct QPCIWindow {
+uint64_t pci_base;/* window address in PCI space */
+uint64_t size;/* window size */
+} QPCIWindow;
 
 typedef struct QPCIBusSPAPR {
 QPCIBus bus;
 QGuestAllocator *alloc;
 
+uint64_t buid;
+
+uint64_t pio_cpu_base;
+QPCIWindow pio;
+
+uint64_t mmio_cpu_base;
+QPCIWindow mmio;
+
 uint64_t pci_hole_start;
 uint64_t pci_hole_size;
 uint64_t pci_hole_alloc;
@@ -59,69 +52,75 @@ typedef struct QPCIBusSPAPR {
 
 static uint8_t qpci_spapr_io_readb(QPCIBus *bus, void *addr)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 uint64_t port = (uintptr_t)addr;
 uint8_t v;
-if (port < SPAPR_PCI_IO_WIN_SIZE) {
-v = readb(IOBASE(0) + port);
+if (port < s->pio.size) {
+v = readb(s->pio_cpu_base + port);
 } else {
-v = readb(MMIOBASE(0) + port);
+v = readb(s->mmio_cpu_base + port);
 }
 return v;
 }
 
 static uint16_t qpci_spapr_io_readw(QPCIBus *bus, void *addr)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 uint64_t port = (uintptr_t)addr;
 uint16_t v;
-if (port < SPAPR_PCI_IO_WIN_SIZE) {
-v = readw(IOBASE(0) + port);
+if (port < s->pio.size) {
+v = readw(s->pio_cpu_base + port);
 } else {
-v = readw(MMIOBASE(0) + port);
+v = readw(s->mmio_cpu_base + port);
 }
 return bswap16(v);
 }
 
 static uint32_t qpci_spapr_io_readl(QPCIBus *bus, void *addr)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 uint64_t port = (uintptr_t)addr;
 uint32_t v;
-if (port < SPAPR_PCI_IO_WIN_SIZE) {
-v = readl(IOBASE(0) + port);
+if (port < s->pio.size) {
+v = readl(s->pio_cpu_base + port);
 } else {
-v = readl(MMIOBASE(0) + port);
+v = readl(s->mmio_cpu_base + port);
 }
 return bswap32(v);
 }
 
 static void qpci_spapr_io_writeb(QPCIBus *bus, void *addr, uint8_t value)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 uint64_t port = (uintptr_t)addr;
-if (port < SPAPR_PCI_IO_WIN_SIZE) {
-writeb(IOBASE(0) + port, value);
+if (port < s->pio.size) {
+writeb(s->pio_cpu_base + port, value);
 } else {
-writeb(MMIOBASE(0) + port, value);
+writeb(s->mmio_cpu_base + port, value);
 }
 }
 
 static void qpci_spapr_io_writew(QPCIBus *bus, void *addr, uint16_t value)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 uint64_t port = (uintptr_t)addr;
 value = bswap16(value);
-if (port < SPAPR_PCI_IO_WIN_SIZE) {
-writew(IOBASE(0) + port, value);
+if (port < s->pio.size) {
+writew(s->pio_cpu_base + port, value);
 } else {
-writew(MMIOBASE(0) + port, value);
+writew(s->mmio_cpu_base + port, value);
 }
 }
 
 static void qpci_spapr_io_writel(QPCIBus *bus, void *addr, uint32_t value)
 {
+QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
 

[Qemu-devel] [PATCH 5/7] spapr: Adjust placement of PCI host bridge to allow > 1TiB RAM

2016-10-11 Thread David Gibson
Currently the default PCI host bridge for the 'pseries' machine type is
constructed with its IO windows in the 1TiB..(1TiB + 64GiB) range in
guest memory space.  This means that if > 1TiB of guest RAM is specified,
the RAM will collide with the PCI IO windows, causing serious problems.

Problems won't be obvious until guest RAM goes a bit beyond 1TiB, because
there's a little unused space at the bottom of the area reserved for PCI,
but essentially this means that > 1TiB of RAM has never worked with the
pseries machine type.

This patch fixes this by altering the placement of PHBs on large-RAM VMs.
Instead of always placing the first PHB at 1TiB, it is placed at the next
1 TiB boundary after the maximum RAM address.

Technically, this changes behaviour in a migration-breaking way for
existing machines with > 1TiB maximum memory, but since having > 1 TiB
memory was broken anyway, this seems like a reasonable trade-off.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f6e9c2a..08a8327 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2376,15 +2376,25 @@ static void spapr_phb_placement(sPAPRMachineState 
*spapr, uint32_t index,
 unsigned n_dma, uint32_t *liobns, Error **errp)
 {
 const uint64_t base_buid = 0x8002000ULL;
-const hwaddr phb0_base = 0x100ULL; /* 1 TiB */
 const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
 const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
 const hwaddr pio_offset = 0x8000; /* 2 GiB */
 const uint32_t max_index = 255;
+const hwaddr phb0_alignment = 0x100ULL; /* 1 TiB */
 
-hwaddr phb_base;
+uint64_t ram_top = MACHINE(spapr)->ram_size;
+hwaddr phb0_base, phb_base;
 int i;
 
+if (MACHINE(spapr)->maxram_size > ram_top) {
+ram_top = spapr->hotplug_memory.base +
+memory_region_size(>hotplug_memory.mr);
+}
+
+phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment);
+
+fprintf(stderr, "DEBUG: ram_top = 0x%016"PRIx64" phb0 @ 
0x%016"HWADDR_PRIX"\n",
+ram_top, phb0_base);
 if (index > max_index) {
 error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
max_index);
-- 
2.7.4




[Qemu-devel] [PATCH 0/7] Improve PCI IO window orgnaization for pseries

2016-10-11 Thread David Gibson
The current way we organize the IO windows into PCI space for the
pseries machine type has several problems.

  - It makes it difficult to create very large MMIO spaces which is
necessary for certain PCI devices with very large BARs.  This
problem has been known for a while.

  - More recently we discovered a more serious problem: it prevents
more than 1TiB of RAM being added to a pseries guest.

  - It doesn't make very efficient use of address space.

Fixing this is complicated by keeping migration from old versionss
working and working out what things belong on which side of the
abstraction barrier between the machine type and the host bridge
device.

This series addresses all these problems.  Patches 1-3/7 perform
preliminary cleanups to the spapr specific PCI test code, which we'll
need to get the tests working with the changed implementation.  4-5/7
represent a minimal fix for the most serious problem (the 1 TiB limit)
- once polished, I'll consider submiting these for the stable branch.
6-7/7 complete a more comprehensive fix.

Changes since RFC:
  * Rebase
  * Fixed some bugs
  * Fixed up PCI testcases which were broken by the change (due to
test limitations)
  * Seriously contemplated, then rejected a completely different
approach

David Gibson (7):
  libqos: Isolate knowledge of spapr memory map to qpci_init_spapr()
  libqos: Correct error in PCI hole sizing for spapr
  libqos: Limit spapr-pci to 32-bit MMIO for now
  spapr_pci: Delegate placement of PCI host bridges to machine type
  spapr: Adjust placement of PCI host bridge to allow > 1TiB RAM
  spapr_pci: Add a 64-bit MMIO window
  spapr: Improved placement of PCI host bridges in guest memory map

 hw/ppc/spapr.c  | 126 +++-
 hw/ppc/spapr_pci.c  |  91 ++--
 include/hw/pci-host/spapr.h |  25 -
 include/hw/ppc/spapr.h  |   5 ++
 tests/endianness-test.c |   3 +-
 tests/libqos/pci-spapr.c| 116 ++--
 tests/spapr-phb-test.c  |   2 +-
 7 files changed, 274 insertions(+), 94 deletions(-)

-- 
2.7.4




[Qemu-devel] [PATCH 7/7] spapr: Improved placement of PCI host bridges in guest memory map

2016-10-11 Thread David Gibson
Currently, the MMIO space for accessing PCI on pseries guests begins at
1 TiB in guest address space.  Each PCI host bridge (PHB) has a 64 GiB
chunk of address space in which it places its outbound PIO and 32-bit and
64-bit MMIO windows.

This scheme as several problems:
  - It limits guest RAM to 1 TiB (though we have a limited fix for this
now)
  - It limits the total MMIO window to 64 GiB.  This is not always enough
for some of the large nVidia GPGPU cards
  - Putting all the windows into a single 64 GiB area means that naturally
aligning things within there will waste more address space.
In addition there was a miscalculation in some of the defaults, which meant
that the MMIO windows for each PHB actually slightly overran the 64 GiB
region for that PHB.  We got away without nasty consequences because
the overrun fit within an unused area at the beginning of the next PHB's
region, but it's not pretty.

This patch implements a new scheme which addresses those problems, and is
also closer to what bare metal hardware and pHyp guests generally use.

Because some guest versions (including most current distro kernels) can't
access PCI MMIO above 64 TiB, we put all the PCI windows between 32 TiB and
64 TiB.  This is broken into 1 TiB chunks.  The 1 TiB contains the PIO
(64 kiB) and 32-bit MMIO (2 GiB) windows for all of the PHBs.  Each
subsequent TiB chunk contains a naturally aligned 64-bit MMIO window for
one PHB each.

This reduces the number of allowed PHBs (without full manual configuration
of all the windows) from 256 to 31, but this should still be plenty in
practice.

We also change some of the default window sizes for manually configured
PHBs to saner values.

Finally we adjust some tests and libqos so that it correctly uses the new
default locations.  Ideally it would parse the device tree given to the
guest, but that's a more complex problem for another time.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  | 130 ++--
 hw/ppc/spapr_pci.c  |   5 +-
 include/hw/pci-host/spapr.h |   8 ++-
 tests/endianness-test.c |   3 +-
 tests/libqos/pci-spapr.c|   9 ++-
 tests/spapr-phb-test.c  |   2 +-
 6 files changed, 118 insertions(+), 39 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 448f5a8..2f7f8a2 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2376,29 +2376,42 @@ static void spapr_phb_placement(sPAPRMachineState 
*spapr, uint32_t index,
 hwaddr *mmio64, hwaddr *mmio64_size,
 unsigned n_dma, uint32_t *liobns, Error **errp)
 {
+/*
+ * New-style PHB window placement.
+ *
+ * Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window
+ * for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO
+ * windows.
+ *
+ * Some guest kernels can't work with MMIO windows above 1<<46
+ * (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB
+ *
+ * 32TiB..33TiB contains the PIO and 32-bit MMIO windows for all
+ * PHBs.  33..34TiB has the 64-bit MMIO window for PHB0, 34..35
+ * has the 64-bit window for PHB1 and so forth.
+ */
 const uint64_t base_buid = 0x8002000ULL;
-const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
-const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
-const hwaddr pio_offset = 0x8000; /* 2 GiB */
-const uint32_t max_index = 255;
-const hwaddr phb0_alignment = 0x100ULL; /* 1 TiB */
+const hwaddr mmio64_win_size = (1ULL << 40); /* 1 TiB */
 
-uint64_t ram_top = MACHINE(spapr)->ram_size;
-hwaddr phb0_base, phb_base;
+int max_phbs = (SPAPR_PCI_LIMIT - SPAPR_PCI_BASE) / mmio64_win_size - 1;
+hwaddr mmio32_base = SPAPR_PCI_BASE + SPAPR_PCI_MEM32_WIN_SIZE;
+hwaddr mmio64_base = SPAPR_PCI_BASE + mmio64_win_size;
 int i;
 
-if (MACHINE(spapr)->maxram_size > ram_top) {
-ram_top = spapr->hotplug_memory.base +
-memory_region_size(>hotplug_memory.mr);
-}
-
-phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment);
-
-fprintf(stderr, "DEBUG: ram_top = 0x%016"PRIx64" phb0 @ 
0x%016"HWADDR_PRIX"\n",
-ram_top, phb0_base);
-if (index > max_index) {
+/* Sanity check natural alignments */
+assert((SPAPR_PCI_BASE % mmio64_win_size) == 0);
+assert((SPAPR_PCI_LIMIT % mmio64_win_size) == 0);
+assert((mmio64_win_size % SPAPR_PCI_MEM32_WIN_SIZE) == 0);
+assert((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) == 0);
+/* Sanity check bounds */
+assert((SPAPR_PCI_BASE + max_phbs * SPAPR_PCI_IO_WIN_SIZE)
+   <= mmio32_base);
+assert(mmio32_base + max_phbs * SPAPR_PCI_MEM32_WIN_SIZE
+   <= mmio64_base);
+
+if (index >= max_phbs) {
 error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
-   max_index);
+   max_phbs - 1);
 

[Qemu-devel] [PATCH 3/7] libqos: Limit spapr-pci to 32-bit MMIO for now

2016-10-11 Thread David Gibson
Currently the functions in pci-spapr.c (like pci-pc.c on which it's based)
don't distinguish between 32-bit and 64-bit PCI MMIO.  At the moment, the
qemu side implementation is a bit weird and has a single MMIO window
straddling 32-bit and 64-bit regions, but we're likely to change that in
future.

In any case, pci-pc.c - and therefore the testcases using PCI - only handle
32-bit MMIOs for now.  For spapr despite whatever changes might happen with
the MMIO windows, the 32-bit window is likely to remain at 2..4 GiB in PCI
space.

So, explicitly limit pci-spapr.c to 32-bit MMIOs for now, we can add 64-bit
MMIO support back in when and if we need it.

Signed-off-by: David Gibson 
---
 tests/libqos/pci-spapr.c | 32 +++-
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/tests/libqos/pci-spapr.c b/tests/libqos/pci-spapr.c
index 3192903..558dfc3 100644
--- a/tests/libqos/pci-spapr.c
+++ b/tests/libqos/pci-spapr.c
@@ -32,8 +32,8 @@ typedef struct QPCIBusSPAPR {
 uint64_t pio_cpu_base;
 QPCIWindow pio;
 
-uint64_t mmio_cpu_base;
-QPCIWindow mmio;
+uint64_t mmio32_cpu_base;
+QPCIWindow mmio32;
 
 uint64_t pci_hole_start;
 uint64_t pci_hole_size;
@@ -58,7 +58,7 @@ static uint8_t qpci_spapr_io_readb(QPCIBus *bus, void *addr)
 if (port < s->pio.size) {
 v = readb(s->pio_cpu_base + port);
 } else {
-v = readb(s->mmio_cpu_base + port);
+v = readb(s->mmio32_cpu_base + port);
 }
 return v;
 }
@@ -71,7 +71,7 @@ static uint16_t qpci_spapr_io_readw(QPCIBus *bus, void *addr)
 if (port < s->pio.size) {
 v = readw(s->pio_cpu_base + port);
 } else {
-v = readw(s->mmio_cpu_base + port);
+v = readw(s->mmio32_cpu_base + port);
 }
 return bswap16(v);
 }
@@ -84,7 +84,7 @@ static uint32_t qpci_spapr_io_readl(QPCIBus *bus, void *addr)
 if (port < s->pio.size) {
 v = readl(s->pio_cpu_base + port);
 } else {
-v = readl(s->mmio_cpu_base + port);
+v = readl(s->mmio32_cpu_base + port);
 }
 return bswap32(v);
 }
@@ -96,7 +96,7 @@ static void qpci_spapr_io_writeb(QPCIBus *bus, void *addr, 
uint8_t value)
 if (port < s->pio.size) {
 writeb(s->pio_cpu_base + port, value);
 } else {
-writeb(s->mmio_cpu_base + port, value);
+writeb(s->mmio32_cpu_base + port, value);
 }
 }
 
@@ -108,7 +108,7 @@ static void qpci_spapr_io_writew(QPCIBus *bus, void *addr, 
uint16_t value)
 if (port < s->pio.size) {
 writew(s->pio_cpu_base + port, value);
 } else {
-writew(s->mmio_cpu_base + port, value);
+writew(s->mmio32_cpu_base + port, value);
 }
 }
 
@@ -120,7 +120,7 @@ static void qpci_spapr_io_writel(QPCIBus *bus, void *addr, 
uint32_t value)
 if (port < s->pio.size) {
 writel(s->pio_cpu_base + port, value);
 } else {
-writel(s->mmio_cpu_base + port, value);
+writel(s->mmio32_cpu_base + port, value);
 }
 }
 
@@ -235,12 +235,9 @@ static void qpci_spapr_iounmap(QPCIBus *bus, void *data)
 /* FIXME */
 }
 
-#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x8000ULL
 #define SPAPR_PCI_WINDOW_BASE0x100ULL
-#define SPAPR_PCI_WINDOW_SPACING 0x10ULL
-#define SPAPR_PCI_MMIO_WIN_OFF   0xA000
-#define SPAPR_PCI_MMIO_WIN_SIZE  (SPAPR_PCI_WINDOW_SPACING - \
- SPAPR_PCI_MEM_WIN_BUS_OFFSET)
+#define SPAPR_PCI_MMIO32_WIN_OFF 0xA000
+#define SPAPR_PCI_MMIO32_WIN_SIZE0x8000 /* 2 GiB */
 #define SPAPR_PCI_IO_WIN_OFF 0x8000
 #define SPAPR_PCI_IO_WIN_SIZE0x1
 
@@ -280,13 +277,14 @@ QPCIBus *qpci_init_spapr(QGuestAllocator *alloc)
 ret->pio.pci_base = 0;
 ret->pio.size = SPAPR_PCI_IO_WIN_SIZE;
 
-ret->mmio_cpu_base = SPAPR_PCI_WINDOW_BASE + SPAPR_PCI_MMIO_WIN_OFF;
-ret->mmio.pci_base = SPAPR_PCI_MEM_WIN_BUS_OFFSET;
-ret->mmio.size = SPAPR_PCI_MMIO_WIN_SIZE;
+/* 32-bit portion of the MMIO window is at PCI address 2..4 GiB */
+ret->mmio32_cpu_base = SPAPR_PCI_WINDOW_BASE + SPAPR_PCI_MMIO32_WIN_OFF;
+ret->mmio32.pci_base = 0x8000; /* 2 GiB */
+ret->mmio32.size = SPAPR_PCI_MMIO32_WIN_SIZE;
 
 ret->pci_hole_start = 0xC000;
 ret->pci_hole_size =
-ret->mmio.pci_base + ret->mmio.size - ret->pci_hole_start;
+ret->mmio32.pci_base + ret->mmio32.size - ret->pci_hole_start;
 ret->pci_hole_alloc = 0;
 
 ret->pci_iohole_start = 0xc000;
-- 
2.7.4




[Qemu-devel] [PATCH 2/7] libqos: Correct error in PCI hole sizing for spapr

2016-10-11 Thread David Gibson
In pci-spapr.c (as in pci-pc.c from which it was derived), the
pci_hole_start/pci_hole_size and pci_iohole_start/pci_iohole_size pairs[1]
essentially define the region of PCI (not CPU) addresses in which MMIO
or PIO BARs respectively will be allocated.

The size value is relative to the start value.  But in pci-spapr.c it is
set to the entire size of the window supported by the (emulated) hardware,
but the start values are *not* at the beginning of the emulated windows.

That means if you tried to map enough PCI BARs, we'd messily overrun the
IO windows, instead of failing in iomap as we should.

This patch corrects this by calculating the hole sizes from the location
of the window in PCI space and the hole start.

[1] Those are bad names, but that's a problem for another time.

Signed-off-by: David Gibson 
---
 tests/libqos/pci-spapr.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/libqos/pci-spapr.c b/tests/libqos/pci-spapr.c
index 1765a54..3192903 100644
--- a/tests/libqos/pci-spapr.c
+++ b/tests/libqos/pci-spapr.c
@@ -285,11 +285,13 @@ QPCIBus *qpci_init_spapr(QGuestAllocator *alloc)
 ret->mmio.size = SPAPR_PCI_MMIO_WIN_SIZE;
 
 ret->pci_hole_start = 0xC000;
-ret->pci_hole_size = SPAPR_PCI_MMIO_WIN_SIZE;
+ret->pci_hole_size =
+ret->mmio.pci_base + ret->mmio.size - ret->pci_hole_start;
 ret->pci_hole_alloc = 0;
 
 ret->pci_iohole_start = 0xc000;
-ret->pci_iohole_size = SPAPR_PCI_IO_WIN_SIZE;
+ret->pci_iohole_size =
+ret->pio.pci_base + ret->pio.size - ret->pci_iohole_start;
 ret->pci_iohole_alloc = 0;
 
 return >bus;
-- 
2.7.4




[Qemu-devel] [PATCH 6/7] spapr_pci: Add a 64-bit MMIO window

2016-10-11 Thread David Gibson
On real hardware, and under pHyp, the PCI host bridges on Power machines
typically advertise two outbound MMIO windows from the guest's physical
memory space to PCI memory space:
  - A 32-bit window which maps onto 2GiB..4GiB in the PCI address space
  - A 64-bit window which maps onto a large region somewhere high in PCI
address space (traditionally this used an identity mapping from guest
physical address to PCI address, but that's not always the case)

The qemu implementation in spapr-pci-host-bridge, however, only supports a
single outbound MMIO window, however.  At least some Linux versions expect
the two windows however, so we arranged this window to map onto the PCI
memory space from 2 GiB..~64 GiB, then advertised it as two contiguous
windows, the "32-bit" window from 2G..4G and the "64-bit" window from
4G..~64G.

This approach means, however, that the 64G window is not naturally aligned.
In turn this limits the size of the largest BAR we can map (which does have
to be naturally aligned) to roughly half of the total window.  With some
large nVidia GPGPU cards which have huge memory BARs, this is starting to
be a problem.

This patch adds true support for separate 32-bit and 64-bit outbound MMIO
windows to the spapr-pci-host-bridge implementation, each of which can
be independently configured.  The 32-bit window always maps to 2G.. in PCI
space, but the PCI address of the 64-bit window can be configured (it
defaults to the same as the guest physical address).

So as not to break possible existing configurations, as long as a 64-bit
window is not specified, a large single window can be specified.  This
will appear the same way to the guest as the old approach, although it's
now implemented by two contiguous memory regions rather than a single one.

For now, this only adds the possibility of 64-bit windows.  The default
configuration still uses the legacy mode.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  | 12 ++---
 hw/ppc/spapr_pci.c  | 66 -
 include/hw/pci-host/spapr.h |  8 --
 include/hw/ppc/spapr.h  |  3 ++-
 4 files changed, 71 insertions(+), 18 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 08a8327..448f5a8 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2372,7 +2372,8 @@ static HotpluggableCPUList 
*spapr_query_hotpluggable_cpus(MachineState *machine)
 
 static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
 uint64_t *buid, hwaddr *pio, hwaddr *pio_size,
-hwaddr *mmio, hwaddr *mmio_size,
+hwaddr *mmio32, hwaddr *mmio32_size,
+hwaddr *mmio64, hwaddr *mmio64_size,
 unsigned n_dma, uint32_t *liobns, Error **errp)
 {
 const uint64_t base_buid = 0x8002000ULL;
@@ -2409,8 +2410,13 @@ static void spapr_phb_placement(sPAPRMachineState 
*spapr, uint32_t index,
 phb_base = phb0_base + index * phb_spacing;
 *pio = phb_base + pio_offset;
 *pio_size = SPAPR_PCI_IO_WIN_SIZE;
-*mmio = phb_base + mmio_offset;
-*mmio_size = SPAPR_PCI_MMIO_WIN_SIZE;
+*mmio32 = phb_base + mmio_offset;
+*mmio32_size = SPAPR_PCI_MMIO_WIN_SIZE;
+/*
+ * We don't set the 64-bit MMIO window, relying on the PHB's
+ * fallback behaviour of automatically splitting a large "32-bit"
+ * window into contiguous 32-bit and 64-bit windows
+ */
 }
 
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index c0fc964..442185b 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1317,6 +1317,7 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
(uint32_t)-1)
 || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
 || (sphb->mem_win_addr != (hwaddr)-1)
+|| (sphb->mem64_win_addr != (hwaddr)-1)
 || (sphb->io_win_addr != (hwaddr)-1)) {
 error_setg(errp, "Either \"index\" or other parameters must"
" be specified for PAPR PHB, not both");
@@ -1326,6 +1327,7 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 smc->phb_placement(spapr, sphb->index,
>buid, >io_win_addr, >io_win_size,
>mem_win_addr, >mem_win_size,
+   >mem64_win_addr, >mem64_win_size,
windows_supported, sphb->dma_liobn, _err);
 if (local_err) {
 error_propagate(errp, local_err);
@@ -1354,6 +1356,38 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
+if (sphb->mem64_win_size != 0) {
+if (sphb->mem64_win_addr == (hwaddr)-1) {
+error_setg(errp,
+  

[Qemu-devel] [PATCH 4/7] spapr_pci: Delegate placement of PCI host bridges to machine type

2016-10-11 Thread David Gibson
The 'spapr-pci-host-bridge' represents the virtual PCI host bridge (PHB)
for a PAPR guest.  Unlike on x86, it's routine on Power (both bare metal
and PAPR guests) to have numerous independent PHBs, each controlling a
separate PCI domain.

There are two ways of configuring the spapr-pci-host-bridge device: first
it can be done fully manually, specifying the locations and sizes of all
the IO windows.  This gives the most control, but is very awkward with 6
mandatory parameters.  Alternatively just an "index" can be specified
which essentially selects from an array of predefined PHB locations.
The PHB at index 0 is automatically created as the default PHB.

The current set of default locations causes some problems for guests with
large RAM (> 1 TiB) or PCI devices with very large BARs (e.g. big nVidia
GPGPU cards via VFIO).  Obviously, for migration we can only change the
locations on a new machine type, however.

This is awkward, because the placement is currently decided within the
spapr-pci-host-bridge code, so it breaks abstraction to look inside the
machine type version.

So, this patch delegates the "default mode" PHB placement from the
spapr-pci-host-bridge device back to the machine type via a public method
in sPAPRMachineClass.  It's still a bit ugly, but it's about the best we
can do.

For now, this just changes where the calculation is done.  It doesn't
change the actual location of the host bridges, or any other behaviour.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  | 34 ++
 hw/ppc/spapr_pci.c  | 22 --
 include/hw/pci-host/spapr.h | 11 +--
 include/hw/ppc/spapr.h  |  4 
 4 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 03e3803..f6e9c2a 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2370,6 +2370,39 @@ static HotpluggableCPUList 
*spapr_query_hotpluggable_cpus(MachineState *machine)
 return head;
 }
 
+static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
+uint64_t *buid, hwaddr *pio, hwaddr *pio_size,
+hwaddr *mmio, hwaddr *mmio_size,
+unsigned n_dma, uint32_t *liobns, Error **errp)
+{
+const uint64_t base_buid = 0x8002000ULL;
+const hwaddr phb0_base = 0x100ULL; /* 1 TiB */
+const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
+const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
+const hwaddr pio_offset = 0x8000; /* 2 GiB */
+const uint32_t max_index = 255;
+
+hwaddr phb_base;
+int i;
+
+if (index > max_index) {
+error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
+   max_index);
+return;
+}
+
+*buid = base_buid + index;
+for (i = 0; i < n_dma; ++i) {
+liobns[i] = SPAPR_PCI_LIOBN(index, i);
+}
+
+phb_base = phb0_base + index * phb_spacing;
+*pio = phb_base + pio_offset;
+*pio_size = SPAPR_PCI_IO_WIN_SIZE;
+*mmio = phb_base + mmio_offset;
+*mmio_size = SPAPR_PCI_MMIO_WIN_SIZE;
+}
+
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -2406,6 +2439,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
 fwc->get_dev_path = spapr_get_fw_dev_path;
 nc->nmi_monitor_handler = spapr_nmi;
+smc->phb_placement = spapr_phb_placement;
 }
 
 static const TypeInfo spapr_machine_info = {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 4f00865..c0fc964 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1311,7 +1311,8 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
 
 if (sphb->index != (uint32_t)-1) {
-hwaddr windows_base;
+sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+Error *local_err = NULL;
 
 if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
(uint32_t)-1)
 || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
@@ -1322,21 +1323,14 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-if (sphb->index > SPAPR_PCI_MAX_INDEX) {
-error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
-   SPAPR_PCI_MAX_INDEX);
+smc->phb_placement(spapr, sphb->index,
+   >buid, >io_win_addr, >io_win_size,
+   >mem_win_addr, >mem_win_size,
+   windows_supported, sphb->dma_liobn, _err);
+if (local_err) {
+error_propagate(errp, local_err);
 return;
 }
-
-sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
-for (i = 0; i < 

[Qemu-devel] [PATCH] 9pfs: fix integer overflow issue in xattr read/write

2016-10-11 Thread Li Qiang
From: Li Qiang 

In 9pfs xattr read/write function, it mix to use unsigned/signed
,32/64 bits integers. This will causes oob read/write issues. This
patch fix this.

Signed-off-by: Li Qiang 
---
 hw/9pfs/9p.c | 29 +
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index e4040dc..8b50bfb 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -1642,21 +1642,21 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, 
V9fsFidState *fidp,
 {
 ssize_t err;
 size_t offset = 7;
-int read_count;
-int64_t xattr_len;
+uint64_t read_count;
+uint64_t xattr_len;
 V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
 VirtQueueElement *elem = v->elems[pdu->idx];
 
 xattr_len = fidp->fs.xattr.len;
+if (xattr_len < off) {
+read_count = 0;
+goto over_read_count;
+}
 read_count = xattr_len - off;
 if (read_count > max_count) {
 read_count = max_count;
-} else if (read_count < 0) {
-/*
- * read beyond XATTR value
- */
-read_count = 0;
 }
+over_read_count:
 err = pdu_marshal(pdu, offset, "d", read_count);
 if (err < 0) {
 return err;
@@ -1982,22 +1982,19 @@ static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, 
V9fsFidState *fidp,
 {
 int i, to_copy;
 ssize_t err = 0;
-int write_count;
-int64_t xattr_len;
+uint64_t write_count;
+uint64_t xattr_len;
 size_t offset = 7;
 
 
 xattr_len = fidp->fs.xattr.len;
+if (xattr_len < off) {
+err = -ENOSPC;
+goto out;
+}
 write_count = xattr_len - off;
 if (write_count > count) {
 write_count = count;
-} else if (write_count < 0) {
-/*
- * write beyond XATTR value len specified in
- * xattrcreate
- */
-err = -ENOSPC;
-goto out;
 }
 err = pdu_marshal(pdu, offset, "d", write_count);
 if (err < 0) {
-- 
1.8.3.1




Re: [Qemu-devel] [PATCH RFC] vfio-pci: put device in INTx disable state in pre_reset

2016-10-11 Thread Cao jin



On 10/12/2016 10:25 AM, Alex Williamson wrote:

On Mon, 10 Oct 2016 17:12:43 +0800
Cao jin  wrote:


Current code cleared the PCI_COMMAND_INTX_DISABLE, which indicates
device/function could asserts its INTx# signal.

PCI local spec says:
A value of 0 enables the assertion of its INTx# signal.
A value of 1 disables the assertion of its INTx# signal.


The PCI spec also says that this bit's state is zero after reset and
we're about to perform a reset, so we expect it to be zero after
reset.  I believe this is the reason a set it this way.  If we want to
set it, we should OR it in, not AND it.  Are you actually seeing any
issues with the current behavior or was this a code inspection
discovery?  Thanks,

Alex



Just code inspection discovery. I understand that the bit is 0 after 
reset. In pre reset, from what I understand, we disabled interrupts 
first, so I *guess*this bit maybe should indicate the current 
state(device can't assert to trigger INTx).


If this bit is set to 1 in pre-reset, then cleared to 0 in post-reset, 
it will be more logical to me. But just clear it to 0 in pre-set seems 
not quite wrong, because we eventually want it to be 0.


And yes, I made a mistake, we should OR it if want to set it.
--
Yours Sincerely,

Cao jin



Signed-off-by: Cao jin 
---
I guess it is a mistake, clearing the bit to enable INTx violate
the intention of vfio_disable_interrupts above.

  hw/vfio/pci.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a5a620a..cce3024 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1898,8 +1898,8 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
   * Also put INTx Disable in known state.
   */
  cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
-cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
- PCI_COMMAND_INTX_DISABLE);
+cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER) |
+ PCI_COMMAND_INTX_DISABLE;
  vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
  }













Re: [Qemu-devel] [dpdk-dev] [PATCH 1/2] vhost: enable any layout feature

2016-10-11 Thread Yuanhan Liu
On Tue, Oct 11, 2016 at 02:57:49PM +0800, Yuanhan Liu wrote:
> > > > > > There was an example: the vhost enqueue optmization patchset from
> > > > > > Zhihong [0] uses memset, and it introduces more than 15% drop (IIRC)

Though it doesn't matter now, but I have verified it yesterday (with and
wihtout memset), the drop could be up to 30+%.

This is to let you know that it could behaviour badly if memset is not
inlined.

> > > > > > on my Ivybridge server: it has no such issue on his server though.
> > > > > >
> > > > > > [0]: http://dpdk.org/ml/archives/dev/2016-August/045272.html
> > > > > >
> > > > > > --yliu
> > > > >
> > > > > I'd say that's weird. what's your config? any chance you
> > > > > are using an old compiler?
> > > > 
> > > > Not really, it's gcc 5.3.1. Maybe Zhihong could explain more. IIRC,
> > > > he said the memset is not well optimized for Ivybridge server.
> > > 
> > > The dst is remote in that case. It's fine on Haswell but has complication
> > > in Ivy Bridge which (wasn't supposed to but) causes serious frontend 
> > > issue.
> > > 
> > > I don't think gcc inlined it there. I'm using fc24 gcc 6.1.1.
> > 
> > 
> > So try something like this then:
> 
> Yes, I saw memset is inlined when this diff is applied.

I have another concern though: It's a trick could let gcc do the inline,
I am not quite sure whether that's ture with other compilers (i.e. clang,
icc, or even, older gcc).

For this case, I think I still prefer some trick like
*(struct ..*) = {0, }

Or even, we may could introduce rte_memset(). IIRC, that has been
proposed somehow before?

--yliu



Re: [Qemu-devel] [PATCH V2] colo-proxy: fix memory leak

2016-10-11 Thread Zhang Chen



On 10/11/2016 10:32 PM, Eric Blake wrote:

On 10/11/2016 02:33 AM, Zhang Chen wrote:

Fix memory leak in colo-compare.c and filter-rewriter.c
Report by Coverity.

This part is fine.


v2:
   - use traces instead of fprintf in colo-compare.c

v1:
   - initial patch

...but this part should live...


Signed-off-by: Zhang Chen 
---

...here, after the --- separator, where 'git am' will ignore it.  It is
useful information to list readers, but will make no sense in qemu.git
history a year from now (we don't care how many versions it took to get
to the version that was committed).  The maintainer can fix it, so by
itself, that's not a reason to send a v3.


OK, I will remove the version info.




  net/colo-compare.c| 11 +++
  net/filter-rewriter.c | 17 +
  trace-events  |  1 +
  3 files changed, 9 insertions(+), 20 deletions(-)

@@ -219,11 +218,9 @@ static int colo_packet_compare_tcp(Packet *spkt, Packet 
*ppkt)
  (spkt->size - ETH_HLEN));
  
  if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {

-sdebug = strdup(inet_ntoa(ppkt->ip->ip_src));
-ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst));
-fprintf(stderr, "%s: src/dst: %s/%s p: seq/ack=%u/%u"
-" s: seq/ack=%u/%u res=%d flags=%x/%x\n",
-__func__, sdebug, ddebug,
+trace_colo_compare_pkt_info(__func__,

net/filter-rewriter.c is the only file that currently uses
trace_...(__func__), that's because the trace mechanism itself already
expands the trace_* call to something with enough context that manually
adding __func__ just gives redundant information.  I'd rather see
__func__ removed from the trace call.


I will remove the __func__ in next version.




+inet_ntoa(ppkt->ip->ip_src),
+inet_ntoa(ppkt->ip->ip_dst),
  (unsigned int)ntohl(ptcp->th_seq),
  (unsigned int)ntohl(ptcp->th_ack),
  (unsigned int)ntohl(stcp->th_seq),

Are these casts still needed?


will fix it.


+++ b/trace-events
@@ -149,6 +149,7 @@ colo_compare_icmp_miscompare(const char *sta, int size) ": %s = 
%d"
  colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char 
*stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, 
ip_src = %s, ip_dst = %s"
  colo_old_packet_check_found(int64_t old_time) "%" PRId64
  colo_compare_miscompare(void) ""
+colo_compare_pkt_info(const char *func, const char *src, const char *dst, uint32_t pseq, 
uint32_t pack, uint32_t sseq, uint32_t sack, int res, uint32_t pflag, uint32_t sflag) 
"%s: src/dst: %s/%s p: seq/ack=%u/%u   s: seq/ack=%u/%u res=%d flags=%x/%x\n"

Again, the 'const char *func' portion is not needed.

Looking forward to v3.


OK, I will send v3 later

Thanks
Zhang Chen


--
Thanks
zhangchen






Re: [Qemu-devel] [PATCH RFC] vfio-pci: put device in INTx disable state in pre_reset

2016-10-11 Thread Alex Williamson
On Mon, 10 Oct 2016 17:12:43 +0800
Cao jin  wrote:

> Current code cleared the PCI_COMMAND_INTX_DISABLE, which indicates
> device/function could asserts its INTx# signal.
> 
> PCI local spec says:
> A value of 0 enables the assertion of its INTx# signal.
> A value of 1 disables the assertion of its INTx# signal.

The PCI spec also says that this bit's state is zero after reset and
we're about to perform a reset, so we expect it to be zero after
reset.  I believe this is the reason a set it this way.  If we want to
set it, we should OR it in, not AND it.  Are you actually seeing any
issues with the current behavior or was this a code inspection
discovery?  Thanks,

Alex

 
> Signed-off-by: Cao jin 
> ---
> I guess it is a mistake, clearing the bit to enable INTx violate
> the intention of vfio_disable_interrupts above.
> 
>  hw/vfio/pci.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index a5a620a..cce3024 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1898,8 +1898,8 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
>   * Also put INTx Disable in known state.
>   */
>  cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
> -cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
> - PCI_COMMAND_INTX_DISABLE);
> +cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER) |
> + PCI_COMMAND_INTX_DISABLE;
>  vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
>  }
>  




[Qemu-devel] [PATCH] mmap-alloc: check before use for ptr pointer

2016-10-11 Thread Gonglei
If ptr mmap failed, we don't need to do a superfluous
calculation for offset variable by ptr (MAP_FAILED).

Signed-off-by: Gonglei 
---
 util/mmap-alloc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 5a85aa3..577862b 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -61,13 +61,15 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool 
shared)
 #else
 void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 #endif
-size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+size_t offset;
 void *ptr1;
 
 if (ptr == MAP_FAILED) {
 return MAP_FAILED;
 }
 
+offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+
 /* Make sure align is a power of 2 */
 assert(!(align & (align - 1)));
 /* Always align to host page size */
-- 
1.8.3.1





Re: [Qemu-devel] [PATCH v8 4/6] docs: Add Documentation for Mediated devices

2016-10-11 Thread Tian, Kevin
> From: Kirti Wankhede [mailto:kwankh...@nvidia.com]
> Sent: Wednesday, October 12, 2016 4:45 AM
> >> +* mdev_supported_types:
> >> +List of current supported mediated device types and its details are 
> >> added
> >> +in this directory in following format:
> >> +
> >> +|- 
> >> +|--- Vendor-specific-attributes [optional]
> >> +|--- mdev_supported_types
> >> +| |--- 
> >> +| |   |--- create
> >> +| |   |--- name
> >> +| |   |--- available_instances
> >> +| |   |--- description /class
> >> +| |   |--- [devices]
> >> +| |--- 
> >> +| |   |--- create
> >> +| |   |--- name
> >> +| |   |--- available_instances
> >> +| |   |--- description /class
> >> +| |   |--- [devices]
> >> +| |--- 
> >> +|  |--- create
> >> +|  |--- name
> >> +|  |--- available_instances
> >> +|  |--- description /class
> >> +|  |--- [devices]
> >> +
> >> +[TBD : description or class is yet to be decided. This will change.]
> >
> > I thought that in previous discussions we had agreed to drop
> > the  concept and use the name as the unique identifier.
> > When reporting these types in libvirt we won't want to report
> > the type id values - we'll want the name strings to be unique.
> >
> 
> The 'name' might not be unique but type_id will be. For example that Neo
> pointed out in earlier discussion, virtual devices can come from two
> different physical devices, end user would be presented with what they
> had selected but there will be internal implementation differences. In
> that case 'type_id' will be unique.
> 

Hi, Kirti, my understanding is that Neo agreed to use an unique type
string (if you still called it ), and then no need of additional
'name' field which can be put inside 'description' field. See below quote:


> I think your discovery only means that for your vendor driver, the name
> will be "11" (as a string).  Perhaps you'd like some sort of vendor
> provided description within each type, but I am not in favor of having
> an arbitrary integer value imply something specific within the sysfs
> interface.  IOW, the NVIDIA vendor driver should be able to create:
> 
> 11
> ├── create
> ├── description
> ├── etc
> └── resolution
> 
> While Intel might create:
> 
> Skylake-vGPU
> ├── create
> ├── description
> ├── etc
> └── resolution
> 
> Maybe "description" is optional for vendors that use useful names?
> Thanks,


> I think we should be able to have a unique vendor type string instead of an
> arbitrary integer value there as long as we are allowed to have a description
> field that can be used to show to the end user as "name / label". 

Thanks
Kevin


Re: [Qemu-devel] [PATCH 00/16] target-sparc improvements

2016-10-11 Thread Richard Henderson

On 10/11/2016 04:42 PM, Mark Cave-Ayland wrote:

I'm fairly sure that I've tested an earlier version of this patchset,
however just to confirm is it just that you want a Tested-by from me of
this branch based upon the v6 atomics patch? If so I can run it against
all of my SPARC/SPARC64 test images over the next day or so.


I remember having posted bits and pieces that are in here, but not the whole 
thing at once.  Please do test wrt the atomics.



r~



Re: [Qemu-devel] [PATCH v3 0/7] Runtime pagesize computation

2016-10-11 Thread Richard Henderson

On 10/11/2016 03:18 PM, Peter Maydell wrote:

On 11 October 2016 at 12:20, Richard Henderson  wrote:

On 10/11/2016 12:08 PM, Peter Maydell wrote:

I would ideally have liked to finalize things much later, but
this is in practice hugely difficult because so many things
(in particular all the address space/memory system code)
assume the target page size is known.



Unfortunate.  I suppose that 4k is still better than 1k, but
I was hoping to get 16k or 64k (or higher) when the OS is
configured to use such.  I.e. totally dynamically configurable
upon write to the appropriate cpu register.


I think that would run into problems with migration:
the migration stream all works in guest-pages of ram and
a mismatch means migration doesn't work.


Perhaps migration should use definitions based off of TARGET_PAGE_BITS_MIN? 
Dunno how big of a job that would be...



The trouble is that all the data structures work in terms
of page sizes (even though we support sub-page allocations
those are still done by carving up a page-size chunk).
It could probably be done but it looked like a gargantuan
task so I decided this was a better compromise.


Fair enough.  This is still an improvement for an interesting guest.


r~




Re: [Qemu-devel] [PATCH v2] tests: Run qtest cases in parallel

2016-10-11 Thread Fam Zheng
On Fri, 09/30 13:25, Marc-André Lureau wrote:
> In any case
> Reviewed-by: Marc-André Lureau 

Paolo, are you happy with taking this patch?

Fam



Re: [Qemu-devel] [PATCH] qcow2: Support BDRV_REQ_MAY_UNMAP

2016-10-11 Thread Fam Zheng
On Wed, 09/28 15:04, Fam Zheng wrote:
> Handling this is similar to what is done to the L2 entry in the case of
> compressed clusters.

Kevin, Max, is there anything else I need to do before this patch can be
applied?

Fam



Re: [Qemu-devel] [PATCH v3 0/5] generic docker run patches

2016-10-11 Thread Fam Zheng
On Tue, 10/11 17:16, Alex Bennée wrote:
> Hi Fam,
> 
> I've re-based the series for the generic run target. The aim being to
> allow a developer to run tests against any generic docker target even
> if it is not in the list of approved targets:
> 
>   make docker-run TEST=test-quick IMAGE=debian:arm64 \
> EXECUTABLE=./aarch64-linux-user/qemu-aarch64 J=9 SHOW_ENV=1
> 
> Since last post:
>   - fix squashing of TARGET_LIST
>   - add new patch to remove verification (could be squashed)

Queued for next pull, thanks!

Fam

> 
> Alex Bennée (5):
>   tests/docker: add travis dockerfile
>   tests/docker: test-build script
>   tests/docker: make test-mingw honour TARGET_LIST
>   tests/docker/Makefile.include: add a generic docker-run target
>   tests/docker/Makefile.include: remove verification targets
> 
>  tests/docker/Makefile.include  | 61 
> +-
>  tests/docker/dockerfiles/travis.docker |  6 
>  tests/docker/test-build| 20 +++
>  tests/docker/test-mingw|  3 +-
>  4 files changed, 66 insertions(+), 24 deletions(-)
>  create mode 100644 tests/docker/dockerfiles/travis.docker
>  create mode 100755 tests/docker/test-build
> 
> -- 
> 2.9.3
> 



Re: [Qemu-devel] [PATCH v4] timer: a9gtimer: remove loop to auto-increment comparator

2016-10-11 Thread Fam Zheng
On Tue, 10/11 22:44, P J P wrote:
> +-- On Mon, 10 Oct 2016, no-re...@ec2-52-6-146-230.compute-1.amazonaws.com 
> wrote --+
> | Your series failed automatic build test. Please find the testing commands 
> and
> | their output below. If you have docker installed, you can probably 
> reproduce it
> | locally.
> 
>   I tried to build it locally(without docker), cloned submodule 'dtc' and 
> used 
> the same './configure' options as in the test. It built okay, no error.
> 
> | === TEST SCRIPT BEGIN ===
> | #!/bin/bash
> | set -e
> | git submodule update --init dtc
> | # Let docker tests dump environment info
> | export SHOW_ENV=1
> | make J=8 docker-test-quick@centos6
> | make J=8 docker-test-mingw@fedora
> | === TEST SCRIPT END ===
> | 
> |   CC  hw/timer/arm_timer.o
> |   CC  hw/timer/arm_mptimer.o
> |   CC  hw/timer/a9gtimer.o
> 
>   It seems to build okay.
>  
> | (gtester:6328): GLib-WARNING **: Failed to execute test binary: 
> tests/test-qmp-output-visitor: Failed to fork (Resource temporarily 
> unavailable)
> | /bin/sh: fork: retry: Resource temporarily unavailable
> | make: vfork: Resource temporarily unavailable
> 
>   This seems like a system issue, not related to the 'a9gtimer.c' patch.

I think you are right, thanks for confirming!

Fam



Re: [Qemu-devel] [PATCH v3 0/7] Runtime pagesize computation

2016-10-11 Thread no-reply
Hi,

Your series seems to have some coding style problems. See output below for
more information:

Message-id: 1476205699-28857-1-git-send-email-peter.mayd...@linaro.org
Subject: [Qemu-devel] [PATCH v3 0/7] Runtime pagesize computation
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

# Useful git options
git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git show --no-patch --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
5a1d89b hw/arm/virt: Set minimum_page_bits to 12
c3c85d5 target-arm: Make page size a runtime setting
b43a8b0 migration/savevm.c: migrate non-default page size
994a37f cpu: Support a target CPU having a variable page size
570aff0 translate-all.c: Compute L1 page table properties at runtime
9891732 exec.c: Remove static allocation of sub_section of sub_page
60c7e4c migration: Remove static allocation of xzblre cache buffer

=== OUTPUT BEGIN ===
Checking PATCH 1/7: migration: Remove static allocation of xzblre cache 
buffer...
Checking PATCH 2/7: exec.c: Remove static allocation of sub_section of 
sub_page...
Checking PATCH 3/7: translate-all.c: Compute L1 page table properties at 
runtime...
Checking PATCH 4/7: cpu: Support a target CPU having a variable page size...
Checking PATCH 5/7: migration/savevm.c: migrate non-default page size...
ERROR: spaces required around that '*' (ctx:VxV)
#96: FILE: migration/savevm.c:359:
+.subsections = (const VMStateDescription*[]) {
 ^

total: 1 errors, 0 warnings, 79 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 6/7: target-arm: Make page size a runtime setting...
Checking PATCH 7/7: hw/arm/virt: Set minimum_page_bits to 12...
=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@freelists.org

Re: [Qemu-devel] [PATCH] char: serial: check divider value against baud base

2016-10-11 Thread no-reply
Hi,

Your series seems to have some coding style problems. See output below for
more information:

Subject: [Qemu-devel] [PATCH] char: serial: check divider value against baud 
base
Message-id: 1476203260-5290-1-git-send-email-ppan...@redhat.com
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

# Useful git options
git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git show --no-patch --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
7aea007 char: serial: check divider value against baud base

=== OUTPUT BEGIN ===
Checking PATCH 1/1: char: serial: check divider value against baud base...
ERROR: braces {} are necessary for all arms of this statement
#25: FILE: hw/char/serial.c:156:
+if (s->divider == 0 || s->divider > s->baudbase)
[...]

total: 1 errors, 0 warnings, 8 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@freelists.org

Re: [Qemu-devel] Potential Bug in vIOMMU which may result in memory wasting

2016-10-11 Thread no-reply
Hi,

Your series seems to have some coding style problems. See output below for
more information:

Message-id: 
a2975661238fb949b60364ef0f2c257436dbd...@shsmsx102.ccr.corp.intel.com
Subject: [Qemu-devel] Potential Bug in vIOMMU which may result in memory wasting
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

# Useful git options
git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git show --no-patch --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
21c104d Potential Bug in vIOMMU which may result in memory wasting

=== OUTPUT BEGIN ===
Checking PATCH 1/1: Potential Bug in vIOMMU which may result in memory 
wasting...
ERROR: do not use C99 // comments
#166: FILE: hw/i386/intel_iommu.c:1990:
+   // exit(1);//Yi: comment out for test purpose

ERROR: do not use C99 // comments
#174: FILE: hw/i386/intel_iommu.c:2325:
+//Yi: add for debug

ERROR: __func__ should be used instead of gcc specific __FUNCTION__
#181: FILE: hw/i386/intel_iommu.c:2332:
+__FUNCTION__,

ERROR: do not use C99 // comments
#210: FILE: hw/i386/intel_iommu.c:2373:
+  //Yi: code snippet for debug, dump the vtd_as_by_buspt

ERROR: do not use C99 // comments
#211: FILE: hw/i386/intel_iommu.c:2374:
+  // r to see if all the allocated vtd_bus are still there

ERROR: suspect code indent for conditional statements (6, 9)
#214: FILE: hw/i386/intel_iommu.c:2377:
+  while (g_hash_table_iter_next (, NULL, (void**)_bus)) {
+ if (pci_bus_num(vtd_bus->bus) == pci_bus_num(bus)) {

ERROR: "(foo**)" should be "(foo **)"
#214: FILE: hw/i386/intel_iommu.c:2377:
+  while (g_hash_table_iter_next (, NULL, (void**)_bus)) {

ERROR: space prohibited between function name and open parenthesis '('
#214: FILE: hw/i386/intel_iommu.c:2377:
+  while (g_hash_table_iter_next (, NULL, (void**)_bus)) {

ERROR: suspect code indent for conditional statements (9, 13)
#215: FILE: hw/i386/intel_iommu.c:2378:
+ if (pci_bus_num(vtd_bus->bus) == pci_bus_num(bus)) {
+ printf("  vtd_bus in s->vtd_as_by_busptr: 0x%llx\n",

ERROR: do not use C99 // comments
#220: FILE: hw/i386/intel_iommu.c:2383:
+  // look up hash table again see if the result is the same with the

ERROR: do not use C99 // comments
#221: FILE: hw/i386/intel_iommu.c:2384:
+ //  beginning one.

ERROR: do not use C99 // comments
#240: FILE: hw/pci/pcie.c:686:
+//assert(prev >= PCI_CONFIG_SPACE_SIZE);

ERROR: do not use C99 // comments
#253: FILE: hw/vfio/common.c:460:
+//memory_region_iommu_replay(giommu->iommu, >n, false);

total: 13 errors, 0 warnings, 83 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@freelists.org

Re: [Qemu-devel] [Questions] NBD issue or CoMutex->holder issue?

2016-10-11 Thread Changlong Xie

On 10/11/2016 06:47 PM, Paolo Bonzini wrote:

the free_sema->queue head, so set free_sema->holder as
>revelant coroutine.

NBD is using the CoMutex in a way that wasn't anticipated.  The simplest
fix is to change it to CoQueue, which is like a condition variable.
Instead of locking if in_flight >= MAX_NBD_REQUESTS - 1, wait on the
queue while in_flight == MAX_NBD_REQUESTS.  Instead of unlocking, use
qemu_co_queue_next to wake up one request.



Thanks for your explanation! will send out a patch later.


Thanks
-Xie


Thanks for the report!

Paolo


>For example if there are N(N=26 and MAX_NBD_REQUESTS=16) nbd write
>requests, so we'll invoke nbd_client_co_pwritev 26 times.
>time request No   Actions
>1 1   in_flight=1, Coroutine=C1
>2 2   in_flight=2, Coroutine=C2






Re: [Qemu-devel] [PATCH v2 2/2] block/replication: Clarify 'top-id' parameter usage

2016-10-11 Thread Changlong Xie

On 10/11/2016 10:54 PM, Eric Blake wrote:

The replication driver only supports the 'top-id' parameter for the
secondary side; it must not be supplied for the primary side.


Will apply in next version.

Thanks
-Xie





Re: [Qemu-devel] [PATCH v2 1/2] block/replication: prefect the logic to acquire 'top_id'

2016-10-11 Thread Changlong Xie

On 10/11/2016 10:52 PM, Eric Blake wrote:

On 10/11/2016 05:46 AM, Changlong Xie wrote:

Only g_strdup(top_id) if 'top_id' is not NULL, although there
is no memory leak here

Signed-off-by: Changlong Xie 
---
  block/replication.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/replication.c b/block/replication.c
index 3bd1cf1..5b432d9 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -104,11 +104,11 @@ static int replication_open(BlockDriverState *bs, QDict 
*options,
  } else if (!strcmp(mode, "secondary")) {
  s->mode = REPLICATION_MODE_SECONDARY;
  top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
-s->top_id = g_strdup(top_id);


g_strdup(NULL) is safe; it returns NULL in that case.


Yes, that's why i said 'there is no memory leak here' in the commit 
message.





-if (!s->top_id) {
+if (!top_id) {
  error_setg(_err, "Missing the option top-id");
  goto fail;
  }
+s->top_id = g_strdup(top_id);


I see no point to this patch, rather than churn.


It just reduce on execution path. Maybe i'm too academic :)
Will remove it in the next series.

Thanks
-Xie











Re: [Qemu-devel] [PATCH 0/3] Improvements for the boot-sector tester

2016-10-11 Thread David Gibson
On Wed, Oct 12, 2016 at 12:05:24AM +0300, Michael S. Tsirkin wrote:
> On Tue, Oct 11, 2016 at 05:19:34PM +0200, Thomas Huth wrote:
> > Here are two patches that try to improve the situation with the
> > slow pxe-test on ppc64 a little bit, and one patch that fixes
> > a potential race condition between tests that run in parallel
> > by using a random filename instead of an invariant one.
> 
> 
> Reviewed-by: Michael S. Tsirkin 
> 
> You can go ahead and merge as I'm offline tomorrow.

Done.

> 
> > Thomas Huth (3):
> >   tests/boot-sector: Use minimum length for the Forth boot script
> >   tests/boot-sector: Use mkstemp() to create a unique file name
> >   tests/boot-sector: Increase time-out to 90 seconds
> > 
> >  tests/bios-tables-test.c |  2 +-
> >  tests/boot-sector.c  | 25 -
> >  tests/boot-sector.h  |  4 ++--
> >  tests/pxe-test.c |  2 +-
> >  4 files changed, 20 insertions(+), 13 deletions(-)
> > 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [QEMU PATCH v5 1/6] migration: alternative way to set instance_id in SaveStateEntry

2016-10-11 Thread David Gibson
On Tue, Oct 11, 2016 at 11:17:35AM -0500, Michael Roth wrote:
> Quoting David Gibson (2016-10-10 00:31:20)
> > On Fri, Oct 07, 2016 at 09:07:49AM +0100, Dr. David Alan Gilbert wrote:
> > > * David Gibson (da...@gibson.dropbear.id.au) wrote:
> > > > On Wed, Oct 05, 2016 at 09:44:57AM -0700, Jianjun Duan wrote:
> > > > > Please see comments below:
> > > > > 
> > > > > On 10/05/2016 03:12 AM, Dr. David Alan Gilbert wrote:
> > > > > > * Jianjun Duan (du...@linux.vnet.ibm.com) wrote:
> > > > > >> In QOM(QEMU Object Model) migrated objects are identified with 
> > > > > >> instance_id
> > > > > >> which is calculated automatically using their path in the QOM 
> > > > > >> composition
> > > > > >> tree. For some objects, this path could change from source to 
> > > > > >> target in
> > > > > >> migration. To migrate such objects, we need to make sure the 
> > > > > >> instance_id does
> > > > > >> not change from source to target. We add a hook in DeviceClass to 
> > > > > >> do customized
> > > > > >> instance_id calculation in such cases.
> > > > > > 
> > > > > > Can you explain a bit about why the path changes from source to 
> > > > > > destination;
> > > > > > the path here should be a feature of the guest state not the host, 
> > > > > > and so I
> > > > > > don't understand why it changes.
> > > > > Please see the discussion with David in the previous versions:
> > > > > http://lists.nongnu.org/archive/html/qemu-ppc/2016-06/msg00062.html
> > > > 
> > > > Um.. your description above really isn't an accurate summary of that
> > > > discussion.
> > > > 
> > > > The point is not that the qom path will vary from source to
> > > > destination for some arbitrary reason, but rather that we anticipate
> > > > future changes in the QOM structure.  Specifically we're considering
> > > > eliminating the DRC objects, and folding their (limited) state into an
> > > > array in the parent object (either the machine or a PCI host bridge).
> > > > 
> > > > That would change the qom paths, and hence the auto-generated instance
> > > > ids, which would break migration between qemu versions before and
> > > > after the restructure.
> > > > 
> > > > I'm not sure that changing the instance ids is enough though, anyway,
> > > > since we're talking about eliminating the object entirely, the
> > > > class/type information in the migration stream also wouldn't match.
> > > > 
> > > > Dave, if you have ideas on how to deal with that, I'd love to hear
> > > > them
> > > 
> > > Eliminating the object entirely would be tricky to deal with;
> > > allowing the structure to change would work if instead of a custom 
> > > instance_id
> > > you had a custom idstr.
> > 
> > Sorry, two misunderstandings here.
> > 
> >   * When I said "structure" I meant in the high-level sense of what
> > objects exist and are responsible for what things, not in the
> > 'struct WhateverState' sense.
> > 
> >   * In fact right now eliminating the objects would be ok, since they
> > have no migration state (which causes the problems this series is
> > trying to address).  But applying this series which adds migration
> > state would make it difficult to eliminate the objects in future.
> > That's an awkward constraint given that we've already got some
> > hints that these objects are not a good idea.
> > 
> > > However, the question then becomes why is the structure changing so much;
> > > ideally things in the migration stream should represent some tangible 
> > > object
> > > that corresponds to something real, but (again ideally) the contents
> > > of the stream should reflect the state of those objects not the current
> > > implementation - so if you want to change the implementation the stream
> > > doesn't change.  Is it your implementation, or the understanding of what
> > > the objects actually represent that's in flux?
> > 
> > So, the objects in question are DRCs - "Dynamic Re-configuration
> > Connector"; silly IBM talk for "a port into which something can be
> > hotplugged", bascially.  These aren't "real" devices, but rather a
> > firmware/hypervisor abstraction which are used to describe a hotplug
> > point.  Each DRC allows either one CPU core, one PCI device, or one
> > LMB (256MiB chunk of RAM) to be hotplugged (or removed).  The PCI DRCs
> > are "owned" by the PCI host bridge to which the device would be
> > connected, the CPU and memory DRCs are owned by the machine.
> > 
> > The state variables which Jianjun Duan is adding to migration are
> > values defined in the PAPR (hypervisor interface) spec, and so are
> > tangible enough to be sensible to migrate.  At the moment, each LMB is
> > represented by a discrete QOM object, but I've been thinking for a
> > while that this may be a mistake.  In particular it's a problem for
> > the LMB DRCs - because each LMB is only 256MiB of memory, we end up
> > with thousands, maybe tens of thousands of DRC objects on a guest with
> > with large maxmem (even if initial memory 

Re: [Qemu-devel] [PATCH v8 6/6] Add common functions for SET_IRQS and GET_REGION_INFO ioctls

2016-10-11 Thread Alex Williamson
On Tue, 11 Oct 2016 01:58:37 +0530
Kirti Wankhede  wrote:

> Add common functions for SET_IRQS and to add capability buffer for
> GET_REGION_INFO ioctls

Clearly should be two (or more) separate patches since SET_IRQS and
REGION_INFO are unrelated changes.  Each of the two capabilities handled
could possibly be separate patches as well.

 
> Signed-off-by: Kirti Wankhede 
> Signed-off-by: Neo Jia 
> Change-Id: Id9e976a2c08b9b2b37da77dac4365ae8f6024b4a
> ---
>  drivers/vfio/pci/vfio_pci.c | 103 +++
>  drivers/vfio/vfio.c | 116 
> 
>  include/linux/vfio.h|   7 +++
>  3 files changed, 162 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index 188b1ff03f5f..f312cbb0eebc 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -478,12 +478,12 @@ static int vfio_pci_for_each_slot_or_bus(struct pci_dev 
> *pdev,
>  }
>  
>  static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
> + struct vfio_region_info *info,
>   struct vfio_info_cap *caps)
>  {
> - struct vfio_info_cap_header *header;
>   struct vfio_region_info_cap_sparse_mmap *sparse;
>   size_t end, size;
> - int nr_areas = 2, i = 0;
> + int nr_areas = 2, i = 0, ret;
>  
>   end = pci_resource_len(vdev->pdev, vdev->msix_bar);
>  
> @@ -494,13 +494,10 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device 
> *vdev,
>  
>   size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas));
>  
> - header = vfio_info_cap_add(caps, size,
> -VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
> - if (IS_ERR(header))
> - return PTR_ERR(header);
> + sparse = kzalloc(size, GFP_KERNEL);
> + if (!sparse)
> + return -ENOMEM;
>  
> - sparse = container_of(header,
> -   struct vfio_region_info_cap_sparse_mmap, header);
>   sparse->nr_areas = nr_areas;
>  
>   if (vdev->msix_offset & PAGE_MASK) {
> @@ -516,24 +513,14 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device 
> *vdev,
>   i++;
>   }
>  
> - return 0;
> -}
> -
> -static int region_type_cap(struct vfio_pci_device *vdev,
> -struct vfio_info_cap *caps,
> -unsigned int type, unsigned int subtype)
> -{
> - struct vfio_info_cap_header *header;
> - struct vfio_region_info_cap_type *cap;
> + info->flags |= VFIO_REGION_INFO_FLAG_CAPS;
>  
> - header = vfio_info_cap_add(caps, sizeof(*cap),
> -VFIO_REGION_INFO_CAP_TYPE, 1);
> - if (IS_ERR(header))
> - return PTR_ERR(header);
> + ret = vfio_info_add_capability(info, caps,
> +   VFIO_REGION_INFO_CAP_SPARSE_MMAP, sparse);
> + kfree(sparse);
>  
> - cap = container_of(header, struct vfio_region_info_cap_type, header);
> - cap->type = type;
> - cap->subtype = subtype;
> + if (ret)
> + return ret;
>  
>   return 0;

Just: return ret;

>  }
> @@ -628,7 +615,8 @@ static long vfio_pci_ioctl(void *device_data,
>   IORESOURCE_MEM && info.size >= PAGE_SIZE) {
>   info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
>   if (info.index == vdev->msix_bar) {
> - ret = msix_sparse_mmap_cap(vdev, );
> + ret = msix_sparse_mmap_cap(vdev, ,
> +);
>   if (ret)
>   return ret;
>   }
> @@ -676,6 +664,9 @@ static long vfio_pci_ioctl(void *device_data,
>  
>   break;
>   default:
> + {
> + struct vfio_region_info_cap_type cap_type;
> +
>   if (info.index >=
>   VFIO_PCI_NUM_REGIONS + vdev->num_regions)
>   return -EINVAL;
> @@ -684,29 +675,26 @@ static long vfio_pci_ioctl(void *device_data,
>  
>   info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
>   info.size = vdev->region[i].size;
> - info.flags = vdev->region[i].flags;
> + info.flags = vdev->region[i].flags |
> +  VFIO_REGION_INFO_FLAG_CAPS;
>  
> - ret = region_type_cap(vdev, ,
> -   vdev->region[i].type,
> -   vdev->region[i].subtype);
> + cap_type.type = vdev->region[i].type;
> + cap_type.subtype = vdev->region[i].subtype;
> 

Re: [Qemu-devel] [PATCH v8 3/6] vfio iommu: Add support for mediated devices

2016-10-11 Thread Alex Williamson
On Tue, 11 Oct 2016 01:58:34 +0530
Kirti Wankhede  wrote:

> VFIO IOMMU drivers are designed for the devices which are IOMMU capable.
> Mediated device only uses IOMMU APIs, the underlying hardware can be
> managed by an IOMMU domain.
> 
> Aim of this change is:
> - To use most of the code of TYPE1 IOMMU driver for mediated devices
> - To support direct assigned device and mediated device in single module
> 
> Added two new callback functions to struct vfio_iommu_driver_ops. Backend
> IOMMU module that supports pining and unpinning pages for mdev devices
> should provide these functions.
> Added APIs for pining and unpining pages to VFIO module. These calls back
> into backend iommu module to actually pin and unpin pages.
> 
> This change adds pin and unpin support for mediated device to TYPE1 IOMMU
> backend module. More details:
> - When iommu_group of mediated devices is attached, task structure is
>   cached which is used later to pin pages and page accounting.
> - It keeps track of pinned pages for mediated domain. This data is used to
>   verify unpinning request and to unpin remaining pages while detaching, if
>   there are any.
> - Used existing mechanism for page accounting. If iommu capable domain
>   exist in the container then all pages are already pinned and accounted.
>   Accouting for mdev device is only done if there is no iommu capable
>   domain in the container.
> - Page accouting is updated on hot plug and unplug mdev device and pass
>   through device.
> 
> Tested by assigning below combinations of devices to a single VM:
> - GPU pass through only
> - vGPU device only
> - One GPU pass through and one vGPU device
> - Linux VM hot plug and unplug vGPU device while GPU pass through device
>   exist
> - Linux VM hot plug and unplug GPU pass through device while vGPU device
>   exist
> 
> Signed-off-by: Kirti Wankhede 
> Signed-off-by: Neo Jia 
> Change-Id: I295d6f0f2e0579b8d9882bfd8fd5a4194b97bd9a
> ---
>  drivers/vfio/vfio.c | 117 +++
>  drivers/vfio/vfio_iommu_type1.c | 685 
> ++--
>  include/linux/vfio.h|  13 +-
>  3 files changed, 724 insertions(+), 91 deletions(-)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 6fd6fa5469de..e3e342861e04 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1782,6 +1782,123 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, 
> size_t offset)
>  }
>  EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
>  
> +static struct vfio_group *vfio_group_from_dev(struct device *dev)
> +{
> + struct vfio_device *device;
> + struct vfio_group *group;
> + int ret;
> +
> + device = vfio_device_get_from_dev(dev);

Note how this does dev->iommu_group->vfio_group->vfio_device and then
we back out one level to get the vfio_group, it's not a terribly
lightweight path.  Perhaps we should have:

struct vfio_device *vfio_group_get_from_dev(struct device *dev)
{
struct iommu_group *iommu_group;
struct vfio_group *group;

iommu_group = iommu_group_get(dev);
if (!iommu_group)
return NULL;

group = vfio_group_get_from_iommu(iommu_group);
iommu_group_put(iommu_group);

return group;
}

vfio_device_get_from_dev() would make use of this.

Then create a separate:

static int vfio_group_add_container_user(struct vfio_group *group)
{

> + if (!atomic_inc_not_zero(>container_users)) {
return -EINVAL;
> + }
> +
> + if (group->noiommu) {
> + atomic_dec(>container_users);
return -EPERM;
> + }
> +
> + if (!group->container->iommu_driver ||
> + !vfio_group_viable(group)) {
> + atomic_dec(>container_users);
return -EINVAL;
> + }
> +
return 0;
}

vfio_group_get_external_user() would be updated to use this.  In fact,
creating these two functions and updating the existing code to use
these should be a separate patch.

Note that your version did not hold a group reference while doing the
pin/unpin operations below, which seems like a bug.

> +
> +err_ret:
> + vfio_device_put(device);
> + return ERR_PTR(ret);
> +}
> +
> +/*
> + * Pin a set of guest PFNs and return their associated host PFNs for local
> + * domain only.
> + * @dev [in] : device
> + * @user_pfn [in]: array of user/guest PFNs
> + * @npage [in]: count of array elements
> + * @prot [in] : protection flags
> + * @phys_pfn[out] : array of host PFNs
> + */
> +long vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
> + long npage, int prot, unsigned long *phys_pfn)
> +{
> + struct vfio_container *container;
> + struct vfio_group *group;
> + struct vfio_iommu_driver *driver;
> + ssize_t ret = -EINVAL;
> +
> + if (!dev || !user_pfn || !phys_pfn)
> + return -EINVAL;
> +
> + group = vfio_group_from_dev(dev);
> + if 

Re: [Qemu-devel] [PATCH 00/29] target-sparc: add Niagara OpenSPARC T1 sun4v emulation

2016-10-11 Thread Mark Cave-Ayland
On 01/10/16 11:05, Artyom Tarasenko wrote:

> This patch series adds a Niagara OpenSPARC T1 sun4v machine.
> The most important new feature: it can boot Solaris 10 / sparc64.
> The machine uses a firmware released by Sun as a part of the OpenSPARC 
> project.
> 
> The series are available under:
> https://github.com/artyom-tarasenko/qemu/tree/sun4v-for-upstream
> 
> The command line for booting Solaris 10 / sparc:
> 
> sparc64-softmmu/qemu-system-sparc64 -M Niagara -L /path/to/S10image/ 
> -nographic -m 256 -drive 
> if=pflash,readonly=on,file=/path/to/S10image/disk.s10hw2
> 
> More info under
> http://tyom.blogspot.de/2016/10/qemu-sun4vniagara-target-went-public.html
> 
> Artyom Tarasenko (29):
>   target-sparc: don't trap on MMU-fault if MMU is disabled
>   target-sparc: use explicit mmu register pointers
>   target-sparc: add UA2005 TTE bit #defines
>   target-sparc: add UltraSPARC T1 TLB #defines
>   target-sparc: on UA2005 don't deliver Interrupt_level_n IRQs in
> hypervisor mode
>   target-sparc: simplify replace_tlb_entry by using TTE_PGSIZE
>   target-sparc: implement UA2005 scratchpad registers
>   target-sparc: implement UltraSPARC-T1 Strand status ASR
>   target-sparc: hypervisor mode takes over nucleus mode
>   target-sparc: implement UA2005 hypervisor traps
>   target-sparc: implement UA2005 GL register
>   target-sparc: implement UA2005 rdhpstate and wrhpstate instructions
>   target-sparc: fix immediate UA2005 traps
>   target-sparc: use direct address translation in hyperprivileged mode
>   target-sparc: allow priveleged ASIs in hyperprivileged mode
>   target-sparc: ignore writes to UA2005 CPU mondo queue register
>   target-sparc: replace the last tlb entry when no free entries left
>   target-sparc: use SparcV9MMU type for sparc64 I/D-MMUs
>   target-sparc: implement UA2005 TSB Pointers
>   target-sparc: simplify ultrasparc_tsb_pointer
>   target-sparc: allow 256M sized pages
>   target-sparc: implement auto-demapping for UA2005 CPUs
>   target-sparc: implement ST_BLKINIT_ ASIs
>   target-sparc: add more registers to dump_mmu
>   target-sparc: implement UA2005 ASI_MMU (0x21)
>   target-sparc: store the UA2005 entries in sun4u format
>   target-sparc: implement sun4v RTC
>   target-sparc: move common cpu initialisation routines to sparc64.c
>   target-sparc: fix up Niagara machine
> 
>  MAINTAINERS |   6 +
>  default-configs/sparc64-softmmu.mak |   2 +
>  hw/sparc64/Makefile.objs|   2 +
>  hw/sparc64/niagara.c| 177 
>  hw/sparc64/sparc64.c| 378 +++
>  hw/sparc64/sun4u.c  | 382 +--
>  hw/timer/Makefile.objs  |   2 +
>  hw/timer/sun4v-rtc.c| 103 ++
>  include/hw/sparc/sparc64.h  |   5 +
>  include/hw/timer/sun4v-rtc.h|   1 +
>  target-sparc/asi.h  |   1 +
>  target-sparc/cpu.c  |  13 +-
>  target-sparc/cpu.h  |  79 +---
>  target-sparc/helper.h   |   1 +
>  target-sparc/int64_helper.c |  42 +++-
>  target-sparc/ldst_helper.c  | 388 
> 
>  target-sparc/machine.c  |   4 +-
>  target-sparc/mmu_helper.c   |  28 +--
>  target-sparc/translate.c|  42 +++-
>  target-sparc/win_helper.c   |  46 -
>  20 files changed, 1171 insertions(+), 531 deletions(-)
>  create mode 100644 hw/sparc64/niagara.c
>  create mode 100644 hw/sparc64/sparc64.c
>  create mode 100644 hw/timer/sun4v-rtc.c
>  create mode 100644 include/hw/sparc/sparc64.h
>  create mode 100644 include/hw/timer/sun4v-rtc.h

In general I think this is a great patchset, although obviously Richard
has further comments on inner workings - excellent job! My only minor
nit is whether -M Niagara should be lower-case to match the rest of the
machine names.

One improvement I'd like to see is a patch to update the documentation
to reflect the new sun4v machine, in particular covering where to get
hold of the PROM/disk images as per the final commit message. And also
does it make sense for us to try and distribute parts of the ROM images
with QEMU, and if so which parts and can we legally do that?

Finally my current understanding is that the atomics/target-sparc
patches will go in first and then this patchset will be rebased upon
those - is that correct? If so, I'll wait for the rebased version before
I give it a thorough test over my sun4u images.


ATB,

Mark.




Re: [Qemu-devel] [PATCH 00/16] target-sparc improvements

2016-10-11 Thread Mark Cave-Ayland
On 10/10/16 16:16, Richard Henderson wrote:

> The two main goals in this patch set are:
> 
>  * Make use of the new MO_ALIGN_* flags, to allow less use of
>check_align, and support partially misaligned fp memory ops.
> 
>  * More cleanups for ASIs, in the end using the new atomic ops.
> 
> The final two patches require the "cmpxchg atomic" v5 patch set
> which I posted yesterday.  Otherwise this patch set should apply
> to master.  The full tree is at
> 
>   git://github.com/rth7680/qemu.git tgt-sparc-6
> 
> There is overlap with Artyom's sun4v patch set.
> 
>   * MMU_PHYS_IDX cleans up patch 14 (use direct address translation
> in hyperprivleged mode).  And if I read patch 9 correctly, may
> allow MMU_HYPV_IDX to be redundant with MMU_PHYS_IDX.  Which would
> be nice, because 7 or more mmu idxes causes the sizes of each of
> the tlb's to be reduced.
> 
>   * The patches that touch the asi's will probably conflict.
> 
> 
> r~
> 
> 
> Richard Henderson (16):
>   target-sparc: Use overalignment flags for twinx and block asis
>   target-sparc: Introduce cpu_raise_exception_ra
>   target-sparc: Add MMU_PHYS_IDX
>   target-sparc: Use MMU_PHYS_IDX for bypass asis
>   target-sparc: Handle more twinx asis
>   target-sparc: Implement swap_asi inline
>   target-sparc: Implement ldstub_asi inline
>   target-sparc: Implement cas_asi/casx_asi inline
>   target-sparc: Implement BCOPY/BFILL inline
>   target-sparc: Remove asi helper code handled inline
>   target-sparc: Implement ldqf and stqf inline
>   target-sparc: Allow 4-byte alignment on fp mem ops
>   target-sparc: Remove MMU_MODE*_SUFFIX
>   target-sparc: Optmize writeback of cpu_cond
>   target-sparc: Use tcg_gen_atomic_xchg_tl
>   target-sparc: Use tcg_gen_atomic_cmpxchg_tl
> 
>  target-sparc/cpu.h |  34 +-
>  target-sparc/helper.c  |  52 ++-
>  target-sparc/helper.h  |   7 -
>  target-sparc/ldst_helper.c | 998 
> -
>  target-sparc/mmu_helper.c  |  47 ++-
>  target-sparc/translate.c   | 446 
>  target-sparc/win_helper.c  |  37 +-
>  7 files changed, 522 insertions(+), 1099 deletions(-)

I'm fairly sure that I've tested an earlier version of this patchset,
however just to confirm is it just that you want a Tested-by from me of
this branch based upon the v6 atomics patch? If so I can run it against
all of my SPARC/SPARC64 test images over the next day or so.


ATB,

Mark.




Re: [Qemu-devel] [ipxe-devel] [PATCH 0/6] ipxe: update roms

2016-10-11 Thread Peter Pickford
Hi,

This caused a regression for me on i7 920.
https://bugs.archlinux.org/task/50778

I bisected qemu and came to the patch that introduced the above.
Then after some more help on irc (fixing up parserom.pl and compiling with
NO_WERROR=1) biscted ipxe and came to
71560d185475117b10994d839afe059577e7768c is the first bad commit
reverting this commit in ipxe master fixes my problem but probably breaks
something else.
bisect log
https://gist.github.com/1c612fd623cc17f97d6e8a61a3a2

No idea why this change should affect Bloomfield processors.


Thanks

Peter

On 24 June 2016 at 05:39, Gerd Hoffmann  wrote:

>   Hi,
>
> Here comes ipxe update for qemu 2.7.  It rebases the ipxe submodule
> to latest upstream master.  We pick up support for virtio 1.0.
> Also a fix for a EFI bug which causes problems with recent edk2
> versions.
>
> There are some more hickups due to network boot changes in edk2
> which are not root-caused yet.  They are present with both old
> and new ipxe versios, so we have at least no regressions here.
> But possibly we will need another (much smaller) bugfix ipxe
> update during freeze.
>
> This patch series also adds a boot rom for the new e1000e emulation
> and, while being at it, for vmxnet3 too.
>
> Patch 6/6 with the binary update has *not* been sent to the list
> as it is pretty big.  If you want the prebuild roms for testing
> please fetch them from the git repository at:
>   git://git.kraxel.org/qemu work/ipxe
>
> cheers,
>   Gerd
>
> Gerd Hoffmann (6):
>   ipxe: update submodule from 4e03af8ec to 041863191
>   ipxe: add e1000e rom
>   ipxe: add vmxnet3 rom
>   e1000e: add boot rom
>   vmxnet3: add boot rom
>   ipxe: update prebuilt binaries
>
>  hw/net/e1000e.c  |   1 +
>  hw/net/vmxnet3.c |   1 +
>  include/hw/i386/pc.h |   4 
>  pc-bios/efi-e1000.rom| Bin 196608 -> 209408 bytes
>  pc-bios/efi-e1000e.rom   | Bin 0 -> 209408 bytes
>  pc-bios/efi-eepro100.rom | Bin 197120 -> 209920 bytes
>  pc-bios/efi-ne2k_pci.rom | Bin 195584 -> 208384 bytes
>  pc-bios/efi-pcnet.rom| Bin 195584 -> 208384 bytes
>  pc-bios/efi-rtl8139.rom  | Bin 199168 -> 211456 bytes
>  pc-bios/efi-virtio.rom   | Bin 193024 -> 211456 bytes
>  pc-bios/efi-vmxnet3.rom  | Bin 0 -> 205312 bytes
>  roms/Makefile|   8 ++--
>  roms/ipxe|   2 +-
>  13 files changed, 13 insertions(+), 3 deletions(-)
>  create mode 100644 pc-bios/efi-e1000e.rom
>  create mode 100644 pc-bios/efi-vmxnet3.rom
>
> --
> 1.8.3.1
>
> ___
> ipxe-devel mailing list
> ipxe-de...@lists.ipxe.org
> https://lists.ipxe.org/mailman/listinfo.cgi/ipxe-devel
>


Re: [Qemu-devel] [PATCH 0/3] Improvements for the boot-sector tester

2016-10-11 Thread Michael S. Tsirkin
On Tue, Oct 11, 2016 at 05:19:34PM +0200, Thomas Huth wrote:
> Here are two patches that try to improve the situation with the
> slow pxe-test on ppc64 a little bit, and one patch that fixes
> a potential race condition between tests that run in parallel
> by using a random filename instead of an invariant one.


Reviewed-by: Michael S. Tsirkin 

You can go ahead and merge as I'm offline tomorrow.

> Thomas Huth (3):
>   tests/boot-sector: Use minimum length for the Forth boot script
>   tests/boot-sector: Use mkstemp() to create a unique file name
>   tests/boot-sector: Increase time-out to 90 seconds
> 
>  tests/bios-tables-test.c |  2 +-
>  tests/boot-sector.c  | 25 -
>  tests/boot-sector.h  |  4 ++--
>  tests/pxe-test.c |  2 +-
>  4 files changed, 20 insertions(+), 13 deletions(-)
> 
> -- 
> 1.8.3.1



Re: [Qemu-devel] [PATCH 0/6] qdev class properties + abstract class support on device-list-properties

2016-10-11 Thread no-reply
Hi,

Your series failed automatic build test. Please find the testing commands and
their output below. If you have docker installed, you can probably reproduce it
locally.

Message-id: 1476218479-3592-1-git-send-email-ehabk...@redhat.com
Subject: [Qemu-devel] [PATCH 0/6] qdev class properties + abstract class 
support on device-list-properties
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash
set -e
git submodule update --init dtc
# Let docker tests dump environment info
export SHOW_ENV=1
make J=8 docker-test-quick@centos6
make J=8 docker-test-mingw@fedora
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag] 
patchew/1476218479-3592-1-git-send-email-ehabk...@redhat.com -> 
patchew/1476218479-3592-1-git-send-email-ehabk...@redhat.com
Switched to a new branch 'test'
9cf653a qdev: Warning about using object_class_property_add() in new code
a682bb9 qmp: Support abstract classes on device-list-properties
f36732c qom: object_class_property_iter_init() function
dd1cae3 qdev: Register static properties as class properties
cbe9b5b qdev: Extract property-default code to qdev_property_set_to_default()
1cdccf7 qdev: qdev_class_set_props() function

=== OUTPUT BEGIN ===
Submodule 'dtc' (git://git.qemu-project.org/dtc.git) registered for path 'dtc'
Cloning into 'dtc'...
Submodule path 'dtc': checked out '65cc4d2748a2c2e6f27f1cf39e07a5dbabd80ebf'
  BUILD   centos6
  ARCHIVE qemu.tgz
  ARCHIVE dtc.tgz
  COPYRUNNER
  RUN test-quick in centos6
Packages installed:
SDL-devel-1.2.14-7.el6_7.1.x86_64
ccache-3.1.6-2.el6.x86_64
epel-release-6-8.noarch
gcc-4.4.7-17.el6.x86_64
git-1.7.1-4.el6_7.1.x86_64
glib2-devel-2.28.8-5.el6.x86_64
libfdt-devel-1.4.0-1.el6.x86_64
make-3.81-23.el6.x86_64
package g++ is not installed
pixman-devel-0.32.8-1.el6.x86_64
tar-1.23-15.el6_8.x86_64
zlib-devel-1.2.3-29.el6.x86_64

Environment variables:
PACKAGES=libfdt-devel ccache tar git make gcc g++ zlib-devel 
glib2-devel SDL-devel pixman-devel epel-release
HOSTNAME=6328b0ad76ac
TERM=xterm
MAKEFLAGS= -j8
HISTSIZE=1000
J=8
USER=root
CCACHE_DIR=/var/tmp/ccache
EXTRA_CONFIGURE_OPTS=
V=
SHOW_ENV=1
MAIL=/var/spool/mail/root
PATH=/usr/lib/ccache:/usr/lib64/ccache:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
PWD=/
LANG=en_US.UTF-8
TARGET_LIST=
HISTCONTROL=ignoredups
SHLVL=1
HOME=/root
TEST_DIR=/tmp/qemu-test
LOGNAME=root
LESSOPEN=||/usr/bin/lesspipe.sh %s
FEATURES= dtc
DEBUG=
G_BROKEN_FILENAMES=1
CCACHE_HASHDIR=
_=/usr/bin/env

Configure options:
--enable-werror --target-list=x86_64-softmmu,aarch64-softmmu 
--prefix=/var/tmp/qemu-build/install
No C++ compiler available; disabling C++ specific optional code
Install prefix/var/tmp/qemu-build/install
BIOS directory/var/tmp/qemu-build/install/share/qemu
binary directory  /var/tmp/qemu-build/install/bin
library directory /var/tmp/qemu-build/install/lib
module directory  /var/tmp/qemu-build/install/lib/qemu
libexec directory /var/tmp/qemu-build/install/libexec
include directory /var/tmp/qemu-build/install/include
config directory  /var/tmp/qemu-build/install/etc
local state directory   /var/tmp/qemu-build/install/var
Manual directory  /var/tmp/qemu-build/install/share/man
ELF interp prefix /usr/gnemul/qemu-%M
Source path   /tmp/qemu-test/src
C compilercc
Host C compiler   cc
C++ compiler  
Objective-C compiler cc
ARFLAGS   rv
CFLAGS-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -g 
QEMU_CFLAGS   -I/usr/include/pixman-1-pthread -I/usr/include/glib-2.0 
-I/usr/lib64/glib-2.0/include   -fPIE -DPIE -m64 -D_GNU_SOURCE 
-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes 
-Wredundant-decls -Wall -Wundef -Wwrite-strings -Wmissing-prototypes 
-fno-strict-aliasing -fno-common -fwrapv  -Wendif-labels -Wmissing-include-dirs 
-Wempty-body -Wnested-externs -Wformat-security -Wformat-y2k -Winit-self 
-Wignored-qualifiers -Wold-style-declaration -Wold-style-definition 
-Wtype-limits -fstack-protector-all
LDFLAGS   -Wl,--warn-common -Wl,-z,relro -Wl,-z,now -pie -m64 -g 
make  make
install   install
pythonpython -B
smbd  /usr/sbin/smbd
module supportno
host CPU  x86_64
host big endian   no
target list   x86_64-softmmu aarch64-softmmu
tcg debug enabled no
gprof enabled no
sparse enabledno
strip binariesyes
profiler  no
static build  no
pixmansystem
SDL support   yes (1.2.14)
GTK support   no 
GTK GL supportno
VTE support   no 
TLS priority  NORMAL
GNUTLS supportno
GNUTLS rndno
libgcrypt no
libgcrypt kdf no
nettleno 
nettle kdfno
libtasn1  no
curses supportno
virgl support no
curl support  no
mingw32 support   no
Audio drivers oss
Block whitelist (rw) 
Block whitelist (ro) 
VirtFS supportno
VNC support   yes
VNC SASL support  no
VNC JPEG support  no
VNC 

Re: [Qemu-devel] [PATCH v8 4/6] docs: Add Documentation for Mediated devices

2016-10-11 Thread Kirti Wankhede


On 10/11/2016 7:44 PM, Daniel P. Berrange wrote:
> On Tue, Oct 11, 2016 at 01:58:35AM +0530, Kirti Wankhede wrote:
>> Add file Documentation/vfio-mediated-device.txt that include details of
>> mediated device framework.
>>
>> Signed-off-by: Kirti Wankhede 
>> Signed-off-by: Neo Jia 
>> Change-Id: I137dd646442936090d92008b115908b7b2c7bc5d
>> ---
>>  Documentation/vfio-mdev/vfio-mediated-device.txt | 219 
>> +++
>>  1 file changed, 219 insertions(+)
>>  create mode 100644 Documentation/vfio-mdev/vfio-mediated-device.txt
>>
>> diff --git a/Documentation/vfio-mdev/vfio-mediated-device.txt 
>> b/Documentation/vfio-mdev/vfio-mediated-device.txt
>> new file mode 100644
>> index ..c1eacb83807b
>> --- /dev/null
>> +++ b/Documentation/vfio-mdev/vfio-mediated-device.txt
>> @@ -0,0 +1,219 @@
>> +/*
>> + * VFIO Mediated devices
>> + *
>> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> 
> Adding "All rights reserved" is bogus since you're providing it under
> the GPL, but I see countless kernel source files have this, so meh.
> 
>> + * Author: Neo Jia 
>> + * Kirti Wankhede 
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
> 
>> +Mediated device management interface via sysfs
>> +--
>> +Management interface via sysfs allows user space software, like libvirt, to
>> +query and configure mediated device in a HW agnostic fashion. This 
>> management
>> +interface provide flexibility to underlying physical device's driver to 
>> support
>> +mediated device hotplug, multiple mediated devices per virtual machine, 
>> multiple
>> +mediated devices from different physical devices, etc.
>> +
>> +Under per-physical device sysfs:
>> +
>> +
>> +* mdev_supported_types:
>> +List of current supported mediated device types and its details are 
>> added
>> +in this directory in following format:
>> +
>> +|- 
>> +|--- Vendor-specific-attributes [optional]
>> +|--- mdev_supported_types
>> +| |--- 
>> +| |   |--- create
>> +| |   |--- name
>> +| |   |--- available_instances
>> +| |   |--- description /class
>> +| |   |--- [devices]
>> +| |--- 
>> +| |   |--- create
>> +| |   |--- name
>> +| |   |--- available_instances
>> +| |   |--- description /class
>> +| |   |--- [devices]
>> +| |--- 
>> +|  |--- create
>> +|  |--- name
>> +|  |--- available_instances
>> +|  |--- description /class
>> +|  |--- [devices]
>> +
>> +[TBD : description or class is yet to be decided. This will change.]
> 
> I thought that in previous discussions we had agreed to drop
> the  concept and use the name as the unique identifier.
> When reporting these types in libvirt we won't want to report
> the type id values - we'll want the name strings to be unique.
> 

The 'name' might not be unique but type_id will be. For example that Neo
pointed out in earlier discussion, virtual devices can come from two
different physical devices, end user would be presented with what they
had selected but there will be internal implementation differences. In
that case 'type_id' will be unique.

> Based on this sysfs spec, the only fields we would report in
> libvirt would be name + available_instances.
> 
>> +Under per mdev device:
>> +--
>> +
>> +|- 
>> +|--- $MDEV_UUID
>> + |--- remove
>> + |--- {link to its type}
>> + |--- vendor-specific-attributes [optional]
> 
> Again, I thought we'd agreed to not have arbitrary vendor
> specific attributes ?
> 
> That said, I don't mind them existing in kernel sysfs, just
> be aware that we'll *not* expose any vendor specific attributes
> in libvirt, so your functional implementation must not rely on
> these attributes being used in any way by libvirt.
> 
> 

Right, Libvirt would not use vendor specific attributes but admin can
use these to get/set extra information for a particular device. These
are optional, so its up to vendor to provide such attributes or not.

Thanks,
Kirti

> 
>> +
>> +* remove: (write only)
>> +Write '1' to 'remove' file would destroy mdev device. Vendor driver can
>> +fail remove() callback if that device is active and vendor driver
>> +doesn't support hot-unplug.
>> +Example:
>> +# echo 1 > /sys/bus/mdev/devices/$mdev_UUID/remove
> 
>> +Mediated device Hotplug:
>> +
>> +
>> +Mediated devices can be created and assigned during runtime. Procedure to
>> +hot-plug mediated device is same as hot-plug PCI device.
> 
> Generally this looks much saner now all the grouping stuff has gone.
> 
> 
> 
> Regards,
> Daniel
> 



[Qemu-devel] [PATCH 6/6] qdev: Warning about using object_class_property_add() in new code

2016-10-11 Thread Eduardo Habkost
The only remaining user of object_class_property_add() is
arm_cpu_post_init(), but removing it may take some work. While we
don't change it, warn people to not use the function in new code.

Cc: Peter Maydell 
Cc: qemu-...@nongnu.org
Signed-off-by: Eduardo Habkost 
---
 hw/core/qdev.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 831414d..626f844 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -813,6 +813,10 @@ static void qdev_property_set_to_default(DeviceState *dev, 
Property *prop,
  * Add a static QOM property to @dev for qdev property @prop.
  * On error, store error in @errp.  Static properties access data in a struct.
  * The type of the QOM property is derived from prop->info.
+ *
+ * Do not use this in new code. Either use qdev_class_set_props(),
+ * or register regular QOM properties using object_property_add() or
+ * object_class_property_add().
  */
 void qdev_property_add_static(DeviceState *dev, Property *prop,
   Error **errp)
-- 
2.7.4




[Qemu-devel] [PATCH 5/6] qmp: Support abstract classes on device-list-properties

2016-10-11 Thread Eduardo Habkost
When an abstract class is used on device-list-properties, we can
simply return the class properties registered for the class.

This will be useful if management software needs to query for CPU
options that are supported by all CPU models, for example.

Signed-off-by: Eduardo Habkost 
---
 qmp.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/qmp.c b/qmp.c
index b3ba9ef..dd6090d 100644
--- a/qmp.c
+++ b/qmp.c
@@ -518,7 +518,7 @@ DevicePropertyInfoList *qmp_device_list_properties(const 
char *typename,
Error **errp)
 {
 ObjectClass *klass;
-Object *obj;
+Object *obj = NULL;
 ObjectProperty *prop;
 ObjectPropertyIterator iter;
 DevicePropertyInfoList *prop_list = NULL;
@@ -537,19 +537,16 @@ DevicePropertyInfoList *qmp_device_list_properties(const 
char *typename,
 }
 
 if (object_class_is_abstract(klass)) {
-error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "name",
-   "non-abstract device type");
-return NULL;
-}
-
-if (DEVICE_CLASS(klass)->cannot_destroy_with_object_finalize_yet) {
-error_setg(errp, "Can't list properties of device '%s'", typename);
-return NULL;
+object_class_property_iter_init(, klass);
+} else {
+if (DEVICE_CLASS(klass)->cannot_destroy_with_object_finalize_yet) {
+error_setg(errp, "Can't list properties of device '%s'", typename);
+return NULL;
+}
+obj = object_new(typename);
+object_property_iter_init(, obj);
 }
 
-obj = object_new(typename);
-
-object_property_iter_init(, obj);
 while ((prop = object_property_iter_next())) {
 DevicePropertyInfo *info;
 DevicePropertyInfoList *entry;
-- 
2.7.4




[Qemu-devel] [PATCH 3/6] qdev: Register static properties as class properties

2016-10-11 Thread Eduardo Habkost
Instead of registering qdev static properties on instance_init,
register them as class properties, at qdev_class_set_props().

qdev_property_add_legacy() was replaced by an equivalent
qdev_class_property_add_legacy() function.
qdev_property_add_static(), on the other hand, can't be
eliminated yet because it is used by arm_cpu_post_init().

Signed-off-by: Eduardo Habkost 
---
 hw/core/qdev.c | 69 --
 1 file changed, 58 insertions(+), 11 deletions(-)

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 178cfa1..831414d 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -739,12 +739,12 @@ static void qdev_get_legacy_property(Object *obj, Visitor 
*v,
 }
 
 /**
- * qdev_property_add_legacy:
- * @dev: Device to add the property to.
+ * qdev_class_property_add_legacy:
+ * @oc: Device to add the property to.
  * @prop: The qdev property definition.
  * @errp: location to store error information.
  *
- * Add a legacy QOM property to @dev for qdev property @prop.
+ * Add a legacy QOM property to @oc for qdev property @prop.
  * On error, store error in @errp.
  *
  * Legacy properties are string versions of QOM properties.  The format of
@@ -754,7 +754,7 @@ static void qdev_get_legacy_property(Object *obj, Visitor 
*v,
  * Do not use this is new code!  QOM Properties added through this interface
  * will be given names in the "legacy" namespace.
  */
-static void qdev_property_add_legacy(DeviceState *dev, Property *prop,
+static void qdev_class_property_add_legacy(ObjectClass *oc, Property *prop,
  Error **errp)
 {
 gchar *name;
@@ -765,11 +765,13 @@ static void qdev_property_add_legacy(DeviceState *dev, 
Property *prop,
 }
 
 name = g_strdup_printf("legacy-%s", prop->name);
-object_property_add(OBJECT(dev), name, "str",
-prop->info->print ? qdev_get_legacy_property : 
prop->info->get,
-NULL,
-NULL,
-prop, errp);
+object_class_property_add(oc, name, "str",
+   prop->info->print ?
+   qdev_get_legacy_property :
+   prop->info->get,
+   NULL,
+   NULL,
+   prop, errp);
 
 g_free(name);
 }
@@ -844,6 +846,45 @@ void qdev_property_add_static(DeviceState *dev, Property 
*prop,
 qdev_property_set_to_default(dev, prop, _abort);
 }
 
+/**
+ * qdev_class_property_add_static:
+ * @oc: Class to add the property to.
+ * @prop: The qdev property definition.
+ * @errp: location to store error information.
+ *
+ * Add a static QOM property to @oc for qdev property @prop.
+ * On error, store error in @errp.  Static properties access data in a struct.
+ * The type of the QOM property is derived from prop->info.
+ */
+static void qdev_class_property_add_static(ObjectClass *oc, Property *prop,
+   Error **errp)
+{
+Error *local_err = NULL;
+
+/*
+ * TODO qdev_prop_ptr does not have getters or setters.  It must
+ * go now that it can be replaced with links.  The test should be
+ * removed along with it: all static properties are read/write.
+ */
+if (!prop->info->get && !prop->info->set) {
+return;
+}
+
+object_class_property_add(oc, prop->name, prop->info->name,
+  prop->info->get, prop->info->set,
+  prop->info->release,
+  prop, _err);
+
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+
+object_class_property_set_description(oc, prop->name,
+  prop->info->description,
+  _abort);
+}
+
 /* @qdev_alias_all_properties - Add alias properties to the source object for
  * all qdev properties on the target DeviceState.
  */
@@ -867,8 +908,15 @@ void qdev_alias_all_properties(DeviceState *target, Object 
*source)
 
 void qdev_class_set_props(DeviceClass *dc, Property *props)
 {
+Property *prop;
+ObjectClass *oc = OBJECT_CLASS(dc);
+
 assert(!dc->props);
 dc->props = props;
+for (prop = dc->props; prop && prop->name; prop++) {
+qdev_class_property_add_legacy(oc, prop, _abort);
+qdev_class_property_add_static(oc, prop, _abort);
+}
 }
 
 static int qdev_add_hotpluggable_device(Object *obj, void *opaque)
@@ -1068,8 +1116,7 @@ static void device_initfn(Object *obj)
 class = object_get_class(OBJECT(dev));
 do {
 for (prop = DEVICE_CLASS(class)->props; prop && prop->name; prop++) {
-qdev_property_add_legacy(dev, prop, _abort);
-qdev_property_add_static(dev, prop, _abort);
+qdev_property_set_to_default(dev, prop, _abort);
 }
 class = 

[Qemu-devel] [PATCH 4/6] qom: object_class_property_iter_init() function

2016-10-11 Thread Eduardo Habkost
The new function will allow us to iterate over class properties
using the same logic we use for object properties. Unit test
included.

Signed-off-by: Eduardo Habkost 
---
 include/qom/object.h   | 14 ++
 qom/object.c   | 11 +--
 tests/check-qom-proplist.c | 28 
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/include/qom/object.h b/include/qom/object.h
index 5ecc2d1..6e3646e 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -995,6 +995,20 @@ void object_property_iter_init(ObjectPropertyIterator 
*iter,
Object *obj);
 
 /**
+ * object_class_property_iter_init:
+ * @klass: the class
+ *
+ * Initializes an iterator for traversing all properties
+ * registered against an object class and all parent classes.
+ *
+ * It is forbidden to modify the property list while iterating,
+ * whether removing or adding properties.
+ */
+void object_class_property_iter_init(ObjectPropertyIterator *iter,
+ ObjectClass *klass);
+
+
+/**
  * object_property_iter_next:
  * @iter: the iterator instance
  *
diff --git a/qom/object.c b/qom/object.c
index 7a05e35..3ae9cc7 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -1010,6 +1010,14 @@ void object_property_iter_init(ObjectPropertyIterator 
*iter,
 iter->nextclass = object_get_class(obj);
 }
 
+
+void object_class_property_iter_init(ObjectPropertyIterator *iter,
+ ObjectClass *klass)
+{
+g_hash_table_iter_init(>iter, klass->properties);
+iter->nextclass = object_class_get_parent(klass);
+}
+
 ObjectProperty *object_property_iter_next(ObjectPropertyIterator *iter)
 {
 gpointer key, val;
@@ -1017,8 +1025,7 @@ ObjectProperty 
*object_property_iter_next(ObjectPropertyIterator *iter)
 if (!iter->nextclass) {
 return NULL;
 }
-g_hash_table_iter_init(>iter, iter->nextclass->properties);
-iter->nextclass = object_class_get_parent(iter->nextclass);
+object_class_property_iter_init(iter, iter->nextclass);
 }
 return val;
 }
diff --git a/tests/check-qom-proplist.c b/tests/check-qom-proplist.c
index a92acc9..769549c 100644
--- a/tests/check-qom-proplist.c
+++ b/tests/check-qom-proplist.c
@@ -496,6 +496,33 @@ static void test_dummy_iterator(void)
 }
 
 
+static void test_dummy_class_iterator(void)
+{
+ObjectClass *klass = object_class_by_name(TYPE_DUMMY);
+ObjectProperty *prop;
+ObjectPropertyIterator iter;
+bool seensv = false, seenav = false, seentype;
+
+object_class_property_iter_init(, klass);
+while ((prop = object_property_iter_next())) {
+if (g_str_equal(prop->name, "sv")) {
+seensv = true;
+} else if (g_str_equal(prop->name, "av")) {
+seenav = true;
+} else if (g_str_equal(prop->name, "type")) {
+/* This prop comes from the base Object class */
+seentype = true;
+} else {
+g_printerr("Found prop '%s'\n", prop->name);
+g_assert_not_reached();
+}
+}
+g_assert(seenav);
+g_assert(seensv);
+g_assert(seentype);
+}
+
+
 static void test_dummy_delchild(void)
 {
 Object *parent = object_get_objects_root();
@@ -524,6 +551,7 @@ int main(int argc, char **argv)
 g_test_add_func("/qom/proplist/badenum", test_dummy_badenum);
 g_test_add_func("/qom/proplist/getenum", test_dummy_getenum);
 g_test_add_func("/qom/proplist/iterator", test_dummy_iterator);
+g_test_add_func("/qom/proplist/class_iterator", test_dummy_class_iterator);
 g_test_add_func("/qom/proplist/delchild", test_dummy_delchild);
 
 return g_test_run();
-- 
2.7.4




[Qemu-devel] [PATCH 0/6] qdev class properties + abstract class support on device-list-properties

2016-10-11 Thread Eduardo Habkost
This series allows abstract classes to be used on
device-list-properties, which will return all class properties
registered for the class.

Patches 1-3 change qdev to register all static properties as
class properties instead of instance properties.

Patches 4-5 change device-list-properties so it can return the
list of properties for abstract classes.

Patch 6 just adds a warning to people to not use
qdev_property_add_static() in new code.

The series is based on the "tests: A few check-qom-proplist
fixes" series I have submitted earlier. A git branch containing
this series can be found at:
  https://github.com/ehabkost/qemu-hacks.git 
work/device-list-abstract-properties

Eduardo Habkost (6):
  qdev: qdev_class_set_props() function
  qdev: Extract property-default code to qdev_property_set_to_default()
  qdev: Register static properties as class properties
  qom: object_class_property_iter_init() function
  qmp: Support abstract classes on device-list-properties
  qdev: Warning about using object_class_property_add() in new code

 hw/9pfs/virtio-9p-device.c  |   2 +-
 hw/acpi/piix4.c |   2 +-
 hw/arm/armv7m.c |   2 +-
 hw/arm/bcm2836.c|   2 +-
 hw/arm/integratorcp.c   |   2 +-
 hw/arm/musicpal.c   |   2 +-
 hw/arm/pxa2xx.c |   4 +-
 hw/arm/pxa2xx_gpio.c|   2 +-
 hw/arm/spitz.c  |   2 +-
 hw/arm/stm32f205_soc.c  |   2 +-
 hw/arm/strongarm.c  |   2 +-
 hw/arm/xlnx-zynqmp.c|   2 +-
 hw/audio/ac97.c |   2 +-
 hw/audio/adlib.c|   2 +-
 hw/audio/cs4231.c   |   2 +-
 hw/audio/cs4231a.c  |   2 +-
 hw/audio/gus.c  |   2 +-
 hw/audio/hda-codec.c|   2 +-
 hw/audio/intel-hda.c|   4 +-
 hw/audio/marvell_88w8618.c  |   2 +-
 hw/audio/pcspk.c|   2 +-
 hw/audio/pl041.c|   2 +-
 hw/audio/sb16.c |   2 +-
 hw/block/fdc.c  |   6 +-
 hw/block/m25p80.c   |   2 +-
 hw/block/nand.c |   2 +-
 hw/block/nvme.c |   2 +-
 hw/block/onenand.c  |   2 +-
 hw/block/pflash_cfi01.c |   2 +-
 hw/block/pflash_cfi02.c |   2 +-
 hw/block/virtio-blk.c   |   2 +-
 hw/char/bcm2835_aux.c   |   2 +-
 hw/char/cadence_uart.c  |   2 +-
 hw/char/debugcon.c  |   2 +-
 hw/char/digic-uart.c|   2 +-
 hw/char/escc.c  |   2 +-
 hw/char/etraxfs_ser.c   |   2 +-
 hw/char/exynos4210_uart.c   |   2 +-
 hw/char/grlib_apbuart.c |   2 +-
 hw/char/imx_serial.c|   2 +-
 hw/char/ipoctal232.c|   2 +-
 hw/char/lm32_juart.c|   2 +-
 hw/char/lm32_uart.c |   2 +-
 hw/char/milkymist-uart.c|   2 +-
 hw/char/parallel.c  |   2 +-
 hw/char/pl011.c |   2 +-
 hw/char/sclpconsole-lm.c|   2 +-
 hw/char/sclpconsole.c   |   2 +-
 hw/char/serial-isa.c|   2 +-
 hw/char/serial-pci.c|   6 +-
 hw/char/spapr_vty.c |   2 +-
 hw/char/stm32f2xx_usart.c   |   2 +-
 hw/char/virtio-console.c|   2 +-
 hw/char/virtio-serial-bus.c |   4 +-
 hw/char/xilinx_uartlite.c   |   2 +-
 hw/core/generic-loader.c|   2 +-
 hw/core/or-irq.c|   2 +-
 hw/core/platform-bus.c  |   2 +-
 hw/core/qdev.c  | 112 ++--
 hw/cpu/a15mpcore.c  |   2 +-
 hw/cpu/a9mpcore.c   |   2 +-
 hw/cpu/arm11mpcore.c|   2 +-
 hw/cpu/realview_mpcore.c|   2 +-
 hw/display/bcm2835_fb.c |   2 +-
 hw/display/cg3.c|   2 +-
 hw/display/cirrus_vga.c |   4 +-
 hw/display/g364fb.c |   2 +-
 hw/display/milkymist-vgafb.c|   2 +-
 hw/display/qxl.c|   2 +-
 hw/display/tcx.c|   2 +-
 hw/display/vga-isa.c|   2 +-
 hw/display/vga-pci.c|   4 +-
 hw/display/virtio-gpu-pci.c |   2 +-
 hw/display/virtio-gpu.c |   2 +-
 hw/display/virtio-vga.c |   2 +-
 hw/display/vmware_vga.c |   2 +-
 hw/dma/i82374.c |   2 +-
 hw/dma/i8257.c  |   2 +-
 hw/dma/pl330.c  |   2 +-
 hw/dma/pxa2xx_dma.c |   2 +-
 hw/dma/sparc32_dma.c|   2 +-
 hw/dma/sun4m_iommu.c|   2 +-
 hw/dma/xilinx_axidma.c  |   2 +-
 hw/gpio/imx_gpio.c  |   2 +-
 hw/gpio/omap_gpio.c |   4 +-
 

[Qemu-devel] [PATCH 2/6] qdev: Extract property-default code to qdev_property_set_to_default()

2016-10-11 Thread Eduardo Habkost
The code that registers qdev properties will be split from the
code that initializes default values on instance_init, so move it
to a separate function.

Signed-off-by: Eduardo Habkost 
---
 hw/core/qdev.c | 41 +
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index a914630..178cfa1 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -775,6 +775,34 @@ static void qdev_property_add_legacy(DeviceState *dev, 
Property *prop,
 }
 
 /**
+ * qdev_property_set_to_default:
+ * @dev: Device where the property will be reset
+ * @prop: The qdev property definition
+ * @errp: location to store error information
+ *
+ * Reset the value of property @prop in @dev to its default value.
+ * On error, store error in @errp.
+ */
+static void qdev_property_set_to_default(DeviceState *dev, Property *prop,
+ Error **errp)
+{
+Object *obj = OBJECT(dev);
+
+if (prop->qtype == QTYPE_NONE) {
+return;
+}
+
+if (prop->qtype == QTYPE_QBOOL) {
+object_property_set_bool(obj, prop->defval, prop->name, errp);
+} else if (prop->info->enum_table) {
+object_property_set_str(obj, prop->info->enum_table[prop->defval],
+prop->name, errp);
+} else if (prop->qtype == QTYPE_QINT) {
+object_property_set_int(obj, prop->defval, prop->name, errp);
+}
+}
+
+/**
  * qdev_property_add_static:
  * @dev: Device to add the property to.
  * @prop: The qdev property definition.
@@ -813,18 +841,7 @@ void qdev_property_add_static(DeviceState *dev, Property 
*prop,
 prop->info->description,
 _abort);
 
-if (prop->qtype == QTYPE_NONE) {
-return;
-}
-
-if (prop->qtype == QTYPE_QBOOL) {
-object_property_set_bool(obj, prop->defval, prop->name, _abort);
-} else if (prop->info->enum_table) {
-object_property_set_str(obj, prop->info->enum_table[prop->defval],
-prop->name, _abort);
-} else if (prop->qtype == QTYPE_QINT) {
-object_property_set_int(obj, prop->defval, prop->name, _abort);
-}
+qdev_property_set_to_default(dev, prop, _abort);
 }
 
 /* @qdev_alias_all_properties - Add alias properties to the source object for
-- 
2.7.4




Re: [Qemu-devel] [PATCH 0/4] Allow blockdev-add for SSH

2016-10-11 Thread no-reply
Hi,

Your series failed automatic build test. Please find the testing commands and
their output below. If you have docker installed, you can probably reproduce it
locally.

Message-id: 1476171437-11830-1-git-send-email-ashijeetacha...@gmail.com
Subject: [Qemu-devel] [PATCH 0/4] Allow blockdev-add for SSH
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash
set -e
git submodule update --init dtc
# Let docker tests dump environment info
export SHOW_ENV=1
make J=8 docker-test-quick@centos6
make J=8 docker-test-mingw@fedora
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
ada148c qapi: allow blockdev-add for ssh
c15d50b block/ssh: Use InetSocketAddress options
370773b block/ssh: Add InetSocketAddress and accept it
7b6cfe0 block/ssh: Add ssh_has_filename_options_conflict()

=== OUTPUT BEGIN ===
Submodule 'dtc' (git://git.qemu-project.org/dtc.git) registered for path 'dtc'
Cloning into 'dtc'...
Submodule path 'dtc': checked out '65cc4d2748a2c2e6f27f1cf39e07a5dbabd80ebf'
  BUILD   centos6
  ARCHIVE qemu.tgz
  ARCHIVE dtc.tgz
  COPYRUNNER
  RUN test-quick in centos6
Packages installed:
SDL-devel-1.2.14-7.el6_7.1.x86_64
ccache-3.1.6-2.el6.x86_64
epel-release-6-8.noarch
gcc-4.4.7-17.el6.x86_64
git-1.7.1-4.el6_7.1.x86_64
glib2-devel-2.28.8-5.el6.x86_64
libfdt-devel-1.4.0-1.el6.x86_64
make-3.81-23.el6.x86_64
package g++ is not installed
pixman-devel-0.32.8-1.el6.x86_64
tar-1.23-15.el6_8.x86_64
zlib-devel-1.2.3-29.el6.x86_64

Environment variables:
PACKAGES=libfdt-devel ccache tar git make gcc g++ zlib-devel 
glib2-devel SDL-devel pixman-devel epel-release
HOSTNAME=15ba99d379b9
TERM=xterm
MAKEFLAGS= -j8
HISTSIZE=1000
J=8
USER=root
CCACHE_DIR=/var/tmp/ccache
EXTRA_CONFIGURE_OPTS=
V=
SHOW_ENV=1
MAIL=/var/spool/mail/root
PATH=/usr/lib/ccache:/usr/lib64/ccache:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
PWD=/
LANG=en_US.UTF-8
TARGET_LIST=
HISTCONTROL=ignoredups
SHLVL=1
HOME=/root
TEST_DIR=/tmp/qemu-test
LOGNAME=root
LESSOPEN=||/usr/bin/lesspipe.sh %s
FEATURES= dtc
DEBUG=
G_BROKEN_FILENAMES=1
CCACHE_HASHDIR=
_=/usr/bin/env

Configure options:
--enable-werror --target-list=x86_64-softmmu,aarch64-softmmu 
--prefix=/var/tmp/qemu-build/install
No C++ compiler available; disabling C++ specific optional code
Install prefix/var/tmp/qemu-build/install
BIOS directory/var/tmp/qemu-build/install/share/qemu
binary directory  /var/tmp/qemu-build/install/bin
library directory /var/tmp/qemu-build/install/lib
module directory  /var/tmp/qemu-build/install/lib/qemu
libexec directory /var/tmp/qemu-build/install/libexec
include directory /var/tmp/qemu-build/install/include
config directory  /var/tmp/qemu-build/install/etc
local state directory   /var/tmp/qemu-build/install/var
Manual directory  /var/tmp/qemu-build/install/share/man
ELF interp prefix /usr/gnemul/qemu-%M
Source path   /tmp/qemu-test/src
C compilercc
Host C compiler   cc
C++ compiler  
Objective-C compiler cc
ARFLAGS   rv
CFLAGS-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -g 
QEMU_CFLAGS   -I/usr/include/pixman-1-pthread -I/usr/include/glib-2.0 
-I/usr/lib64/glib-2.0/include   -fPIE -DPIE -m64 -D_GNU_SOURCE 
-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes 
-Wredundant-decls -Wall -Wundef -Wwrite-strings -Wmissing-prototypes 
-fno-strict-aliasing -fno-common -fwrapv  -Wendif-labels -Wmissing-include-dirs 
-Wempty-body -Wnested-externs -Wformat-security -Wformat-y2k -Winit-self 
-Wignored-qualifiers -Wold-style-declaration -Wold-style-definition 
-Wtype-limits -fstack-protector-all
LDFLAGS   -Wl,--warn-common -Wl,-z,relro -Wl,-z,now -pie -m64 -g 
make  make
install   install
pythonpython -B
smbd  /usr/sbin/smbd
module supportno
host CPU  x86_64
host big endian   no
target list   x86_64-softmmu aarch64-softmmu
tcg debug enabled no
gprof enabled no
sparse enabledno
strip binariesyes
profiler  no
static build  no
pixmansystem
SDL support   yes (1.2.14)
GTK support   no 
GTK GL supportno
VTE support   no 
TLS priority  NORMAL
GNUTLS supportno
GNUTLS rndno
libgcrypt no
libgcrypt kdf no
nettleno 
nettle kdfno
libtasn1  no
curses supportno
virgl support no
curl support  no
mingw32 support   no
Audio drivers oss
Block whitelist (rw) 
Block whitelist (ro) 
VirtFS supportno
VNC support   yes
VNC SASL support  no
VNC JPEG support  no
VNC PNG support   no
xen support   no
brlapi supportno
bluez  supportno
Documentation no
PIE   yes
vde support   no
netmap supportno
Linux AIO support no
ATTR/XATTR support yes
Install blobs yes
KVM support   yes
RDMA support  no
TCG interpreter   no
fdt support   yes
preadv supportyes
fdatasync yes
madvise   yes
posix_madvise yes

Re: [Qemu-devel] [PATCH v8 2/6] vfio: VFIO based driver for Mediated devices

2016-10-11 Thread Kirti Wankhede


On 10/11/2016 9:25 AM, Alex Williamson wrote:
> On Tue, 11 Oct 2016 01:58:33 +0530
> Kirti Wankhede  wrote:
> 
>> vfio_mdev driver registers with mdev core driver.
>> MDEV core driver creates mediated device and calls probe routine of
>> vfio_mdev driver for each device.
>> Probe routine of vfio_mdev driver adds mediated device to VFIO core module
>>
>> This driver forms a shim layer that pass through VFIO devices operations
>> to vendor driver for mediated devices.
>>
>> Signed-off-by: Kirti Wankhede 
>> Signed-off-by: Neo Jia 
>> Change-Id: I583f4734752971d3d112324d69e2508c88f359ec
>> ---
>>  drivers/vfio/mdev/Kconfig   |   6 ++
>>  drivers/vfio/mdev/Makefile  |   1 +
>>  drivers/vfio/mdev/vfio_mdev.c   | 171 
>> 
>>  drivers/vfio/pci/vfio_pci_private.h |   6 +-
>>  4 files changed, 181 insertions(+), 3 deletions(-)
>>  create mode 100644 drivers/vfio/mdev/vfio_mdev.c
> 
> Looking pretty good so far, a few preliminary comments below.  Thanks,
> 
> Alex
> 

Thanks Alex.

I'm preparing next patch with your suggestions here. Also let us know if
you have any more comments.

Thanks,
Kirti



[Qemu-devel] [PATCH v6 35/35] target-alpha: Emulate LL/SC using cmpxchg helpers

2016-10-11 Thread Richard Henderson
Emulating LL/SC with cmpxchg is not correct, since it can
suffer from the ABA problem.  However, portable parallel
code is written assuming only cmpxchg which means that in
practice this is a viable alternative.

Signed-off-by: Richard Henderson 
---
 linux-user/main.c|  49 --
 target-alpha/cpu.h   |   4 --
 target-alpha/helper.c|   6 ---
 target-alpha/machine.c   |   2 -
 target-alpha/translate.c | 104 ---
 5 files changed, 45 insertions(+), 120 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 7055e54..bb48260 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2903,51 +2903,6 @@ void cpu_loop(CPUM68KState *env)
 #endif /* TARGET_M68K */
 
 #ifdef TARGET_ALPHA
-static void do_store_exclusive(CPUAlphaState *env, int reg, int quad)
-{
-target_ulong addr, val, tmp;
-target_siginfo_t info;
-int ret = 0;
-
-addr = env->lock_addr;
-tmp = env->lock_st_addr;
-env->lock_addr = -1;
-env->lock_st_addr = 0;
-
-start_exclusive();
-mmap_lock();
-
-if (addr == tmp) {
-if (quad ? get_user_s64(val, addr) : get_user_s32(val, addr)) {
-goto do_sigsegv;
-}
-
-if (val == env->lock_value) {
-tmp = env->ir[reg];
-if (quad ? put_user_u64(tmp, addr) : put_user_u32(tmp, addr)) {
-goto do_sigsegv;
-}
-ret = 1;
-}
-}
-env->ir[reg] = ret;
-env->pc += 4;
-
-mmap_unlock();
-end_exclusive();
-return;
-
- do_sigsegv:
-mmap_unlock();
-end_exclusive();
-
-info.si_signo = TARGET_SIGSEGV;
-info.si_errno = 0;
-info.si_code = TARGET_SEGV_MAPERR;
-info._sifields._sigfault._addr = addr;
-queue_signal(env, TARGET_SIGSEGV, QEMU_SI_FAULT, );
-}
-
 void cpu_loop(CPUAlphaState *env)
 {
 CPUState *cs = CPU(alpha_env_get_cpu(env));
@@ -3122,10 +3077,6 @@ void cpu_loop(CPUAlphaState *env)
 queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
 }
 break;
-case EXCP_STL_C:
-case EXCP_STQ_C:
-do_store_exclusive(env, env->error_code, trapnr - EXCP_STL_C);
-break;
 case EXCP_INTERRUPT:
 /* Just indicate that signals should be handled asap.  */
 break;
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 871d9ba..b08d160 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -230,7 +230,6 @@ struct CPUAlphaState {
 uint64_t pc;
 uint64_t unique;
 uint64_t lock_addr;
-uint64_t lock_st_addr;
 uint64_t lock_value;
 
 /* The FPCR, and disassembled portions thereof.  */
@@ -346,9 +345,6 @@ enum {
 EXCP_ARITH,
 EXCP_FEN,
 EXCP_CALL_PAL,
-/* For Usermode emulation.  */
-EXCP_STL_C,
-EXCP_STQ_C,
 };
 
 /* Alpha-specific interrupt pending bits.  */
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index 9ba3e1a..2ef6cbe 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -306,12 +306,6 @@ void alpha_cpu_do_interrupt(CPUState *cs)
 case EXCP_CALL_PAL:
 name = "call_pal";
 break;
-case EXCP_STL_C:
-name = "stl_c";
-break;
-case EXCP_STQ_C:
-name = "stq_c";
-break;
 }
 qemu_log("INT %6d: %s(%#x) pc=%016" PRIx64 " sp=%016" PRIx64 "\n",
  ++count, name, env->error_code, env->pc, env->ir[IR_SP]);
diff --git a/target-alpha/machine.c b/target-alpha/machine.c
index 710b783..b99a123 100644
--- a/target-alpha/machine.c
+++ b/target-alpha/machine.c
@@ -45,8 +45,6 @@ static VMStateField vmstate_env_fields[] = {
 VMSTATE_UINTTL(unique, CPUAlphaState),
 VMSTATE_UINTTL(lock_addr, CPUAlphaState),
 VMSTATE_UINTTL(lock_value, CPUAlphaState),
-/* Note that lock_st_addr is not saved; it is a temporary
-   used during the execution of the st[lq]_c insns.  */
 
 VMSTATE_UINT8(ps, CPUAlphaState),
 VMSTATE_UINT8(intr_flag, CPUAlphaState),
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index a2e2a62..03e4776 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -99,7 +99,6 @@ static TCGv cpu_std_ir[31];
 static TCGv cpu_fir[31];
 static TCGv cpu_pc;
 static TCGv cpu_lock_addr;
-static TCGv cpu_lock_st_addr;
 static TCGv cpu_lock_value;
 
 #ifndef CONFIG_USER_ONLY
@@ -116,7 +115,6 @@ void alpha_translate_init(void)
 static const GlobalVar vars[] = {
 DEF_VAR(pc),
 DEF_VAR(lock_addr),
-DEF_VAR(lock_st_addr),
 DEF_VAR(lock_value),
 };
 
@@ -198,6 +196,23 @@ static TCGv dest_sink(DisasContext *ctx)
 return ctx->sink;
 }
 
+static void free_context_temps(DisasContext *ctx)
+{
+if (!TCGV_IS_UNUSED_I64(ctx->sink)) {
+tcg_gen_discard_i64(ctx->sink);
+tcg_temp_free(ctx->sink);
+TCGV_UNUSED_I64(ctx->sink);
+}
+if 

[Qemu-devel] [PATCH v6 34/35] target-alpha: Introduce MMU_PHYS_IDX

2016-10-11 Thread Richard Henderson
Rather than using helpers for physical accesses, use a mmu index.
The primary cleanup is with store-conditional on physical addresses.

Signed-off-by: Richard Henderson 
---
 target-alpha/cpu.h| 18 +---
 target-alpha/helper.c |  8 ++
 target-alpha/helper.h |  9 --
 target-alpha/mem_helper.c | 73 ---
 target-alpha/translate.c  | 50 ++--
 5 files changed, 44 insertions(+), 114 deletions(-)

diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index dcdd041..871d9ba 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -201,7 +201,7 @@ enum {
 
 /* MMU modes definitions */
 
-/* Alpha has 5 MMU modes: PALcode, kernel, executive, supervisor, and user.
+/* Alpha has 5 MMU modes: PALcode, Kernel, Executive, Supervisor, and User.
The Unix PALcode only exposes the kernel and user modes; presumably
executive and supervisor are used by VMS.
 
@@ -209,22 +209,18 @@ enum {
there are PALmode instructions that can access data via physical mode
or via an os-installed "alternate mode", which is one of the 4 above.
 
-   QEMU does not currently properly distinguish between code/data when
-   looking up addresses.  To avoid having to address this issue, our
-   emulated PALcode will cheat and use the KSEG mapping for its code+data
-   rather than physical addresses.
+   That said, we're only emulating Unix PALcode, and not attempting VMS,
+   so we don't need to implement Executive and Supervisor.  QEMU's own
+   PALcode cheats and usees the KSEG mapping for its code+data rather than
+   physical addresses.  */
 
-   Moreover, we're only emulating Unix PALcode, and not attempting VMS.
-
-   All of which allows us to drop all but kernel and user modes.
-   Elide the unused MMU modes to save space.  */
-
-#define NB_MMU_MODES 2
+#define NB_MMU_MODES 3
 
 #define MMU_MODE0_SUFFIX _kernel
 #define MMU_MODE1_SUFFIX _user
 #define MMU_KERNEL_IDX   0
 #define MMU_USER_IDX 1
+#define MMU_PHYS_IDX 2
 
 typedef struct CPUAlphaState CPUAlphaState;
 
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index 85168b7..9ba3e1a 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -126,6 +126,14 @@ static int get_physical_address(CPUAlphaState *env, 
target_ulong addr,
 int prot = 0;
 int ret = MM_K_ACV;
 
+/* Handle physical accesses.  */
+if (mmu_idx == MMU_PHYS_IDX) {
+phys = addr;
+prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ret = -1;
+goto exit;
+}
+
 /* Ensure that the virtual address is properly sign-extended from
the last implemented virtual address bit.  */
 if (saddr >> TARGET_VIRT_ADDR_SPACE_BITS != saddr >> 63) {
diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index c3d8a3e..004221d 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -92,15 +92,6 @@ DEF_HELPER_FLAGS_2(ieee_input_cmp, TCG_CALL_NO_WG, void, 
env, i64)
 DEF_HELPER_FLAGS_2(ieee_input_s, TCG_CALL_NO_WG, void, env, i64)
 
 #if !defined (CONFIG_USER_ONLY)
-DEF_HELPER_2(ldl_phys, i64, env, i64)
-DEF_HELPER_2(ldq_phys, i64, env, i64)
-DEF_HELPER_2(ldl_l_phys, i64, env, i64)
-DEF_HELPER_2(ldq_l_phys, i64, env, i64)
-DEF_HELPER_3(stl_phys, void, env, i64, i64)
-DEF_HELPER_3(stq_phys, void, env, i64, i64)
-DEF_HELPER_3(stl_c_phys, i64, env, i64, i64)
-DEF_HELPER_3(stq_c_phys, i64, env, i64, i64)
-
 DEF_HELPER_FLAGS_1(tbia, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(tbis, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_1(tb_flush, TCG_CALL_NO_RWG, void, env)
diff --git a/target-alpha/mem_helper.c b/target-alpha/mem_helper.c
index 1b2be50..78a7d45 100644
--- a/target-alpha/mem_helper.c
+++ b/target-alpha/mem_helper.c
@@ -25,79 +25,6 @@
 
 /* Softmmu support */
 #ifndef CONFIG_USER_ONLY
-
-uint64_t helper_ldl_phys(CPUAlphaState *env, uint64_t p)
-{
-CPUState *cs = CPU(alpha_env_get_cpu(env));
-return (int32_t)ldl_phys(cs->as, p);
-}
-
-uint64_t helper_ldq_phys(CPUAlphaState *env, uint64_t p)
-{
-CPUState *cs = CPU(alpha_env_get_cpu(env));
-return ldq_phys(cs->as, p);
-}
-
-uint64_t helper_ldl_l_phys(CPUAlphaState *env, uint64_t p)
-{
-CPUState *cs = CPU(alpha_env_get_cpu(env));
-env->lock_addr = p;
-return env->lock_value = (int32_t)ldl_phys(cs->as, p);
-}
-
-uint64_t helper_ldq_l_phys(CPUAlphaState *env, uint64_t p)
-{
-CPUState *cs = CPU(alpha_env_get_cpu(env));
-env->lock_addr = p;
-return env->lock_value = ldq_phys(cs->as, p);
-}
-
-void helper_stl_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
-{
-CPUState *cs = CPU(alpha_env_get_cpu(env));
-stl_phys(cs->as, p, v);
-}
-
-void helper_stq_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
-{
-CPUState *cs = CPU(alpha_env_get_cpu(env));
-stq_phys(cs->as, p, v);
-}
-
-uint64_t helper_stl_c_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
-{
-CPUState *cs = CPU(alpha_env_get_cpu(env));
-uint64_t 

[Qemu-devel] [PATCH v6 30/35] target-arm: emulate aarch64's LL/SC using cmpxchg helpers

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

Emulating LL/SC with cmpxchg is not correct, since it can
suffer from the ABA problem. Portable parallel code, however,
is written assuming only cmpxchg--and not LL/SC--is available.
This means that in practice emulating LL/SC with cmpxchg is
a viable alternative.

The appended emulates LL/SC pairs in aarch64 with cmpxchg helpers.
This works in both user and system mode. In usermode, it avoids
pausing all other CPUs to perform the LL/SC pair. The subsequent
performance and scalability improvement is significant, as the
plots below show. They plot the throughput of atomic_add-bench
compiled for ARM and executed on a 64-core x86 machine.

Hi-res plots: http://imgur.com/a/JVc8Y

atomic_add-bench: 100 ops/thread, [0,1] range

  18 ++-+--+-+--+--+--+---++
 +cmpxchg +-E--+   + +  +  +  +|
  16 ++master +-H--+  ++
 |||
  14 ++   ++
 | |   |
  12 ++|  ++
 | |   |
  10  ++
   8 ++E  ++
 |+++  |
   6 ++ | ++
 |  |  |
   4 ++ | ++
 |   | |
   2 +H++E+---++
 + | +E+++E+---+--+E+++E+--+E+--+E+++E+---+--+E|
   0 ++H-HH-+-H+-+--+--+--+---++
 0  10 2030 40 50 60
Number of threads

atomic_add-bench: 100 ops/thread, [0,2] range

  18 ++-+--+-+--+--+--+---++
 +cmpxchg +-E--+   + +  +  +  +|
  16 ++master +-H--+  ++
 | |   |
  14 ++E  ++
 | |   |
  12 ++|  ++
 |+++  |
  10 ++ | ++
   8 ++ | ++
 |  |  |
   6 ++ | ++
 |   | |
   4 ++  |++
 |  +E+--- |
   2 +H+ +E+-+++  +++  +++   ---+E+-+E+--+++
 +++++E+---+--+E+++E+--+E+---   +++   +  +E|
   0 ++H-HH-+-H+-+--+--+--+---++
 0  10 2030 40 50 60
Number of threads

   atomic_add-bench: 100 ops/thread, [0,128] range

  70 ++-+--+-+--+--+--+---++
 +cmpxchg +-E--+   + +  +  +  +|
  60 ++master +-H--+  +++---+E+-+E+--+E+
 |+E+--E---+E+---  |
 | ---+++  |
  50 ++  +++---   ++
 |  -+E+   |
  40 ++  +++  ++
 |E-   |
 |  --||
  30 ++   -- +++  ++
 |  +E+|
  20 ++E+ ++
 |E+  

Re: [Qemu-devel] [PATCH v3 0/7] Runtime pagesize computation

2016-10-11 Thread Peter Maydell
On 11 October 2016 at 12:20, Richard Henderson  wrote:
> On 10/11/2016 12:08 PM, Peter Maydell wrote:
>> I would ideally have liked to finalize things much later, but
>> this is in practice hugely difficult because so many things
>> (in particular all the address space/memory system code)
>> assume the target page size is known.

> Unfortunate.  I suppose that 4k is still better than 1k, but
> I was hoping to get 16k or 64k (or higher) when the OS is
> configured to use such.  I.e. totally dynamically configurable
> upon write to the appropriate cpu register.

I think that would run into problems with migration:
the migration stream all works in guest-pages of ram and
a mismatch means migration doesn't work.

> Given how the memory subsystem already dynamically reconfigures
> itself for changes in address_space topology, I assumed page size
> changes would be trivial and fall out naturally.

The trouble is that all the data structures work in terms
of page sizes (even though we support sub-page allocations
those are still done by carving up a page-size chunk).
It could probably be done but it looked like a gargantuan
task so I decided this was a better compromise.

thanks
-- PMM



[Qemu-devel] [PATCH v6 31/35] linux-user: remove handling of ARM's EXCP_STREX

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

The exception is not emitted anymore.

Reviewed-by: Alex Bennée 
Signed-off-by: Emilio G. Cota 
Signed-off-by: Richard Henderson 
Message-Id: <1467054136-10430-29-git-send-email-c...@braap.org>
---
 linux-user/main.c | 93 ---
 1 file changed, 93 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index ae68672..4530e60 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -553,94 +553,6 @@ do_kernel_trap(CPUARMState *env)
 return 0;
 }
 
-/* Store exclusive handling for AArch32 */
-static int do_strex(CPUARMState *env)
-{
-uint64_t val;
-int size;
-int rc = 1;
-int segv = 0;
-uint32_t addr;
-start_exclusive();
-if (env->exclusive_addr != env->exclusive_test) {
-goto fail;
-}
-/* We know we're always AArch32 so the address is in uint32_t range
- * unless it was the -1 exclusive-monitor-lost value (which won't
- * match exclusive_test above).
- */
-assert(extract64(env->exclusive_addr, 32, 32) == 0);
-addr = env->exclusive_addr;
-size = env->exclusive_info & 0xf;
-switch (size) {
-case 0:
-segv = get_user_u8(val, addr);
-break;
-case 1:
-segv = get_user_data_u16(val, addr, env);
-break;
-case 2:
-case 3:
-segv = get_user_data_u32(val, addr, env);
-break;
-default:
-abort();
-}
-if (segv) {
-env->exception.vaddress = addr;
-goto done;
-}
-if (size == 3) {
-uint32_t valhi;
-segv = get_user_data_u32(valhi, addr + 4, env);
-if (segv) {
-env->exception.vaddress = addr + 4;
-goto done;
-}
-if (arm_cpu_bswap_data(env)) {
-val = deposit64((uint64_t)valhi, 32, 32, val);
-} else {
-val = deposit64(val, 32, 32, valhi);
-}
-}
-if (val != env->exclusive_val) {
-goto fail;
-}
-
-val = env->regs[(env->exclusive_info >> 8) & 0xf];
-switch (size) {
-case 0:
-segv = put_user_u8(val, addr);
-break;
-case 1:
-segv = put_user_data_u16(val, addr, env);
-break;
-case 2:
-case 3:
-segv = put_user_data_u32(val, addr, env);
-break;
-}
-if (segv) {
-env->exception.vaddress = addr;
-goto done;
-}
-if (size == 3) {
-val = env->regs[(env->exclusive_info >> 12) & 0xf];
-segv = put_user_data_u32(val, addr + 4, env);
-if (segv) {
-env->exception.vaddress = addr + 4;
-goto done;
-}
-}
-rc = 0;
-fail:
-env->regs[15] += 4;
-env->regs[(env->exclusive_info >> 4) & 0xf] = rc;
-done:
-end_exclusive();
-return segv;
-}
-
 void cpu_loop(CPUARMState *env)
 {
 CPUState *cs = CPU(arm_env_get_cpu(env));
@@ -812,11 +724,6 @@ void cpu_loop(CPUARMState *env)
 case EXCP_INTERRUPT:
 /* just indicate that signals should be handled asap */
 break;
-case EXCP_STREX:
-if (!do_strex(env)) {
-break;
-}
-/* fall through for segv */
 case EXCP_PREFETCH_ABORT:
 case EXCP_DATA_ABORT:
 addr = env->exception.vaddress;
-- 
2.7.4




Re: [Qemu-devel] [PATCH v8 1/6] vfio: Mediated device Core driver

2016-10-11 Thread Kirti Wankhede


On 10/11/2016 9:21 AM, Alex Williamson wrote:
> On Tue, 11 Oct 2016 01:58:32 +0530
> Kirti Wankhede  wrote:
>> ---
>>  drivers/vfio/Kconfig |   1 +
>>  drivers/vfio/Makefile|   1 +
>>  drivers/vfio/mdev/Kconfig|  12 ++
>>  drivers/vfio/mdev/Makefile   |   5 +
>>  drivers/vfio/mdev/mdev_core.c| 363 
>> +++
>>  drivers/vfio/mdev/mdev_driver.c  | 131 ++
>>  drivers/vfio/mdev/mdev_private.h |  41 +
>>  drivers/vfio/mdev/mdev_sysfs.c   | 295 +++
>>  include/linux/mdev.h | 178 +++
>>  9 files changed, 1027 insertions(+)
>>  create mode 100644 drivers/vfio/mdev/Kconfig
>>  create mode 100644 drivers/vfio/mdev/Makefile
>>  create mode 100644 drivers/vfio/mdev/mdev_core.c
>>  create mode 100644 drivers/vfio/mdev/mdev_driver.c
>>  create mode 100644 drivers/vfio/mdev/mdev_private.h
>>  create mode 100644 drivers/vfio/mdev/mdev_sysfs.c
>>  create mode 100644 include/linux/mdev.h
> 
> 
> Overall this is heading in a good direction.  What kernel is this
> series against?  I could only apply it to v4.7, yet some of the
> dependencies claimed in the cover letter are only in v4.8.  linux-next
> or v4.8 are both good baselines right now, as we move to v4.9-rc
> releases, linux-next probably becomes a better target.
> 

Thanks Alex.

Yes, this series is against kernel v4.7. Patch 1 - 5 gets applied to
linux-next cleanly, patch 6/6 shows conflicts against linux-next.

I'm preparing next version of this patch set against linux-next.

Thanks,
Kirti.


> A few initial comments below, I'll likely have more as I wrap my head
> around it.  Thanks,
> 
> Alex
> 



[Qemu-devel] [PATCH v6 32/35] linux-user: remove handling of aarch64's EXCP_STREX

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

The exception is not emitted anymore.

Reviewed-by: Alex Bennée 
Signed-off-by: Emilio G. Cota 
Signed-off-by: Richard Henderson 
Message-Id: <1467054136-10430-30-git-send-email-c...@braap.org>
---
 linux-user/main.c | 125 --
 1 file changed, 125 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 4530e60..7055e54 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -772,124 +772,6 @@ void cpu_loop(CPUARMState *env)
 
 #else
 
-/*
- * Handle AArch64 store-release exclusive
- *
- * rs = gets the status result of store exclusive
- * rt = is the register that is stored
- * rt2 = is the second register store (in STP)
- *
- */
-static int do_strex_a64(CPUARMState *env)
-{
-uint64_t val;
-int size;
-bool is_pair;
-int rc = 1;
-int segv = 0;
-uint64_t addr;
-int rs, rt, rt2;
-
-start_exclusive();
-/* size | is_pair << 2 | (rs << 4) | (rt << 9) | (rt2 << 14)); */
-size = extract32(env->exclusive_info, 0, 2);
-is_pair = extract32(env->exclusive_info, 2, 1);
-rs = extract32(env->exclusive_info, 4, 5);
-rt = extract32(env->exclusive_info, 9, 5);
-rt2 = extract32(env->exclusive_info, 14, 5);
-
-addr = env->exclusive_addr;
-
-if (addr != env->exclusive_test) {
-goto finish;
-}
-
-switch (size) {
-case 0:
-segv = get_user_u8(val, addr);
-break;
-case 1:
-segv = get_user_u16(val, addr);
-break;
-case 2:
-segv = get_user_u32(val, addr);
-break;
-case 3:
-segv = get_user_u64(val, addr);
-break;
-default:
-abort();
-}
-if (segv) {
-env->exception.vaddress = addr;
-goto error;
-}
-if (val != env->exclusive_val) {
-goto finish;
-}
-if (is_pair) {
-if (size == 2) {
-segv = get_user_u32(val, addr + 4);
-} else {
-segv = get_user_u64(val, addr + 8);
-}
-if (segv) {
-env->exception.vaddress = addr + (size == 2 ? 4 : 8);
-goto error;
-}
-if (val != env->exclusive_high) {
-goto finish;
-}
-}
-/* handle the zero register */
-val = rt == 31 ? 0 : env->xregs[rt];
-switch (size) {
-case 0:
-segv = put_user_u8(val, addr);
-break;
-case 1:
-segv = put_user_u16(val, addr);
-break;
-case 2:
-segv = put_user_u32(val, addr);
-break;
-case 3:
-segv = put_user_u64(val, addr);
-break;
-}
-if (segv) {
-goto error;
-}
-if (is_pair) {
-/* handle the zero register */
-val = rt2 == 31 ? 0 : env->xregs[rt2];
-if (size == 2) {
-segv = put_user_u32(val, addr + 4);
-} else {
-segv = put_user_u64(val, addr + 8);
-}
-if (segv) {
-env->exception.vaddress = addr + (size == 2 ? 4 : 8);
-goto error;
-}
-}
-rc = 0;
-finish:
-env->pc += 4;
-/* rs == 31 encodes a write to the ZR, thus throwing away
- * the status return. This is rather silly but valid.
- */
-if (rs < 31) {
-env->xregs[rs] = rc;
-}
-error:
-/* instruction faulted, PC does not advance */
-/* either way a strex releases any exclusive lock we have */
-env->exclusive_addr = -1;
-end_exclusive();
-return segv;
-}
-
 /* AArch64 main loop */
 void cpu_loop(CPUARMState *env)
 {
@@ -931,11 +813,6 @@ void cpu_loop(CPUARMState *env)
 info._sifields._sigfault._addr = env->pc;
 queue_signal(env, info.si_signo, QEMU_SI_FAULT, );
 break;
-case EXCP_STREX:
-if (!do_strex_a64(env)) {
-break;
-}
-/* fall through for segv */
 case EXCP_PREFETCH_ABORT:
 case EXCP_DATA_ABORT:
 info.si_signo = TARGET_SIGSEGV;
@@ -971,8 +848,6 @@ void cpu_loop(CPUARMState *env)
 process_pending_signals(env);
 /* Exception return on AArch64 always clears the exclusive monitor,
  * so any return to running guest code implies this.
- * A strex (successful or otherwise) also clears the monitor, so
- * we don't need to specialcase EXCP_STREX.
  */
 env->exclusive_addr = -1;
 }
-- 
2.7.4




[Qemu-devel] [PATCH v6 26/35] tests: add atomic_add-bench

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

With this microbenchmark we can measure the overhead of emulating atomic
instructions with a configurable degree of contention.

The benchmark spawns $n threads, each performing $o atomic ops (additions)
in a loop. Each atomic operation is performed on a different cache line
(assuming lines are 64b long) that is randomly selected from a range [0, $r).

[ Note: each $foo corresponds to a -foo flag ]

Signed-off-by: Emilio G. Cota 
Signed-off-by: Richard Henderson 
Message-Id: <1467054136-10430-20-git-send-email-c...@braap.org>
---
 tests/.gitignore |   1 +
 tests/Makefile.include   |   4 +-
 tests/atomic_add-bench.c | 163 +++
 3 files changed, 167 insertions(+), 1 deletion(-)
 create mode 100644 tests/atomic_add-bench.c

diff --git a/tests/.gitignore b/tests/.gitignore
index 0f0c79b..ea379b4 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -1,3 +1,4 @@
+atomic_add-bench
 check-qdict
 check-qfloat
 check-qint
diff --git a/tests/Makefile.include b/tests/Makefile.include
index a7c..177661e 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -454,7 +454,8 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o 
tests/check-qdict.o \
tests/test-opts-visitor.o tests/test-qmp-event.o \
tests/rcutorture.o tests/test-rcu-list.o \
tests/test-qdist.o \
-   tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o
+   tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o \
+   tests/atomic_add-bench.o
 
 $(test-obj-y): QEMU_INCLUDES += -Itests
 QEMU_CFLAGS += -I$(SRC_PATH)/tests
@@ -499,6 +500,7 @@ tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y)
 tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) 
$(test-util-obj-y)
 tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y)
 tests/test-bufferiszero$(EXESUF): tests/test-bufferiszero.o $(test-util-obj-y)
+tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o $(test-util-obj-y)
 
 tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
new file mode 100644
index 000..69c59ad
--- /dev/null
+++ b/tests/atomic_add-bench.c
@@ -0,0 +1,163 @@
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/host-utils.h"
+#include "qemu/processor.h"
+
+struct thread_info {
+uint64_t r;
+} QEMU_ALIGNED(64);
+
+struct count {
+unsigned long val;
+} QEMU_ALIGNED(64);
+
+static QemuThread *threads;
+static struct thread_info *th_info;
+static unsigned int n_threads = 1;
+static unsigned int n_ready_threads;
+static struct count *counts;
+static unsigned int duration = 1;
+static unsigned int range = 1024;
+static bool test_start;
+static bool test_stop;
+
+static const char commands_string[] =
+" -n = number of threads\n"
+" -d = duration in seconds\n"
+" -r = range (will be rounded up to pow2)";
+
+static void usage_complete(char *argv[])
+{
+fprintf(stderr, "Usage: %s [options]\n", argv[0]);
+fprintf(stderr, "options:\n%s\n", commands_string);
+}
+
+/*
+ * From: https://en.wikipedia.org/wiki/Xorshift
+ * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
+ * guaranteed to be >= INT_MAX).
+ */
+static uint64_t xorshift64star(uint64_t x)
+{
+x ^= x >> 12; /* a */
+x ^= x << 25; /* b */
+x ^= x >> 27; /* c */
+return x * UINT64_C(2685821657736338717);
+}
+
+static void *thread_func(void *arg)
+{
+struct thread_info *info = arg;
+
+atomic_inc(_ready_threads);
+while (!atomic_mb_read(_start)) {
+cpu_relax();
+}
+
+while (!atomic_read(_stop)) {
+unsigned int index;
+
+info->r = xorshift64star(info->r);
+index = info->r & (range - 1);
+atomic_inc([index].val);
+}
+return NULL;
+}
+
+static void run_test(void)
+{
+unsigned int remaining;
+unsigned int i;
+
+while (atomic_read(_ready_threads) != n_threads) {
+cpu_relax();
+}
+atomic_mb_set(_start, true);
+do {
+remaining = sleep(duration);
+} while (remaining);
+atomic_mb_set(_stop, true);
+
+for (i = 0; i < n_threads; i++) {
+qemu_thread_join([i]);
+}
+}
+
+static void create_threads(void)
+{
+unsigned int i;
+
+threads = g_new(QemuThread, n_threads);
+th_info = g_new(struct thread_info, n_threads);
+counts = qemu_memalign(64, sizeof(*counts) * range);
+memset(counts, 0, sizeof(*counts) * range);
+
+for (i = 0; i < n_threads; i++) {
+struct thread_info *info = _info[i];
+
+info->r = (i + 1) ^ time(NULL);
+qemu_thread_create([i], NULL, thread_func, info,
+   QEMU_THREAD_JOINABLE);
+}
+}
+
+static void pr_params(void)
+{
+printf("Parameters:\n");
+printf(" # of threads:  

[Qemu-devel] [PATCH v6 33/35] target-arm: remove EXCP_STREX + cpu_exclusive_{test, info}

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

The exception is not emitted anymore; remove it and the associated
TCG variables.

Reviewed-by: Alex Bennée 
Signed-off-by: Emilio G. Cota 
Signed-off-by: Richard Henderson 
Message-Id: <1467054136-10430-31-git-send-email-c...@braap.org>
---
 target-arm/cpu.h   | 17 ++---
 target-arm/internals.h |  4 +---
 target-arm/translate.c | 10 --
 target-arm/translate.h |  4 
 4 files changed, 7 insertions(+), 28 deletions(-)

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 76d824d..a38cec0 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -46,13 +46,12 @@
 #define EXCP_BKPT7
 #define EXCP_EXCEPTION_EXIT  8   /* Return from v7M exception.  */
 #define EXCP_KERNEL_TRAP 9   /* Jumped to kernel code page.  */
-#define EXCP_STREX  10
-#define EXCP_HVC11   /* HyperVisor Call */
-#define EXCP_HYP_TRAP   12
-#define EXCP_SMC13   /* Secure Monitor Call */
-#define EXCP_VIRQ   14
-#define EXCP_VFIQ   15
-#define EXCP_SEMIHOST   16   /* semihosting call (A64 only) */
+#define EXCP_HVC10   /* HyperVisor Call */
+#define EXCP_HYP_TRAP   11
+#define EXCP_SMC12   /* Secure Monitor Call */
+#define EXCP_VIRQ   13
+#define EXCP_VFIQ   14
+#define EXCP_SEMIHOST   15   /* semihosting call (A64 only) */
 
 #define ARMV7M_EXCP_RESET   1
 #define ARMV7M_EXCP_NMI 2
@@ -475,10 +474,6 @@ typedef struct CPUARMState {
 uint64_t exclusive_addr;
 uint64_t exclusive_val;
 uint64_t exclusive_high;
-#if defined(CONFIG_USER_ONLY)
-uint64_t exclusive_test;
-uint32_t exclusive_info;
-#endif
 
 /* iwMMXt coprocessor state.  */
 struct {
diff --git a/target-arm/internals.h b/target-arm/internals.h
index cd57401..3edccd2 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -46,8 +46,7 @@ static inline bool excp_is_internal(int excp)
 || excp == EXCP_HALTED
 || excp == EXCP_EXCEPTION_EXIT
 || excp == EXCP_KERNEL_TRAP
-|| excp == EXCP_SEMIHOST
-|| excp == EXCP_STREX;
+|| excp == EXCP_SEMIHOST;
 }
 
 /* Exception names for debug logging; note that not all of these
@@ -63,7 +62,6 @@ static const char * const excnames[] = {
 [EXCP_BKPT] = "Breakpoint",
 [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit",
 [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage",
-[EXCP_STREX] = "QEMU intercept of STREX",
 [EXCP_HVC] = "Hypervisor Call",
 [EXCP_HYP_TRAP] = "Hypervisor Trap",
 [EXCP_SMC] = "Secure Monitor Call",
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 7048cb3..5e21b52 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -64,10 +64,6 @@ static TCGv_i32 cpu_R[16];
 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
 TCGv_i64 cpu_exclusive_addr;
 TCGv_i64 cpu_exclusive_val;
-#ifdef CONFIG_USER_ONLY
-TCGv_i64 cpu_exclusive_test;
-TCGv_i32 cpu_exclusive_info;
-#endif
 
 /* FIXME:  These should be removed.  */
 static TCGv_i32 cpu_F0s, cpu_F1s;
@@ -101,12 +97,6 @@ void arm_translate_init(void)
 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
 offsetof(CPUARMState, exclusive_val), "exclusive_val");
-#ifdef CONFIG_USER_ONLY
-cpu_exclusive_test = tcg_global_mem_new_i64(cpu_env,
-offsetof(CPUARMState, exclusive_test), "exclusive_test");
-cpu_exclusive_info = tcg_global_mem_new_i32(cpu_env,
-offsetof(CPUARMState, exclusive_info), "exclusive_info");
-#endif
 
 a64_translate_init();
 }
diff --git a/target-arm/translate.h b/target-arm/translate.h
index dbd7ac8..d4e205e 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -77,10 +77,6 @@ extern TCGv_env cpu_env;
 extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
 extern TCGv_i64 cpu_exclusive_addr;
 extern TCGv_i64 cpu_exclusive_val;
-#ifdef CONFIG_USER_ONLY
-extern TCGv_i64 cpu_exclusive_test;
-extern TCGv_i32 cpu_exclusive_info;
-#endif
 
 static inline int arm_dc_feature(DisasContext *dc, int feature)
 {
-- 
2.7.4




[Qemu-devel] [PATCH v6 24/35] target-i386: emulate XCHG using atomic helper

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-19-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index e781869..c8827f3 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -5564,12 +5564,8 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 gen_lea_modrm(env, s, modrm);
 gen_op_mov_v_reg(ot, cpu_T0, reg);
 /* for xchg, lock is implicit */
-if (!(prefixes & PREFIX_LOCK))
-gen_helper_lock();
-gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
-gen_op_st_v(s, ot, cpu_T0, cpu_A0);
-if (!(prefixes & PREFIX_LOCK))
-gen_helper_unlock();
+tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
+   s->mem_index, ot | MO_LE);
 gen_op_mov_reg_v(ot, reg, cpu_T1);
 }
 break;
-- 
2.7.4




[Qemu-devel] [PATCH v6 25/35] target-i386: remove helper_lock()

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

It's been superseded by the atomic helpers.

The use of the atomic helpers provides a significant performance and scalability
improvement. Below is the result of running the atomic_add-test microbenchmark 
with:
 $ x86_64-linux-user/qemu-x86_64 tests/atomic_add-bench -o 500 -r $r -n $n
, where $n is the number of threads and $r is the allowed range for the 
additions.

The scenarios measured are:
- atomic: implements x86' ADDL with the atomic_add helper (i.e. this patchset)
- cmpxchg: implement x86' ADDL with a TCG loop using the cmpxchg helper
- master: before this patchset

Results sorted in ascending range, i.e. descending degree of contention.
Y axis is Throughput in Mops/s. Tests are run on an AMD machine with 64
Opteron 6376 cores.

atomic_add-bench: 500 ops/thread, [0,1] range

  25 ++-+--+-+--+--+--+---++
 + atomic +-E--+   + +  +  +  +|
 |cmpxchg +-H--+   |
  20 +Emaster +-N--+  ++
 |||
 |++   |
 |||
  15 +++  ++
 |N|   |
 |+|   |
  10 ++|  ++
 |+|+  |
 | |-+E+--+++  ---+E+--+E+--+E+-+E+--+E|
 |+E+E+- +++ +E+--+E+--|
   5 ++|+ ++
 |+N+H+--- +++ |
 N+--+H+++++   +  +++  --++H+--+H+--+H+++H+---+--- |
   0 ++-+-H+---H-+--+--+--+---H+
 0  10 2030 40 50 60
Number of threads

atomic_add-bench: 500 ops/thread, [0,2] range

  25 ++-+--+-+--+--+--+---++
 ++atomic +-E--+   + +  +  +  +|
 |cmpxchg +-H--+   |
  20 ++master +-N--+  ++
 |E|   |
 |++   |
 ||E   |
  15 ++|  ++
 |N||  |
 |+||   ---+E+--+E+-+E+--+E|
  10 ++| |---+E+--+E+-+E+---+++  +++
 ||H+E+--+E+-- |
 |+|
 | ||  |
   5 ++|+H+--  +++++
 |+N+-  ---+H+--+H+--  |
 +  +N+--+H+++H+---+--+H+++H+---+  ++H+---+--+H|
   0 ++-+--+-+--+--+--+---++
 0  10 2030 40 50 60
Number of threads

atomic_add-bench: 500 ops/thread, [0,8] range

  40 ++-+--+-+--+--+--+---++
 ++atomic +-E--+   + +  +  +  +|
  35 +cmpxchg +-H--+  ++
 | master +-N--+   ---+E+--+E+--+E+-+E+--+E|
  30 ++|   ---+E+--   +++ ++
 | |-+E+---|
  25 ++E +++  ++
 |+ -+E+   |
  20 +E+ E-- +++  ++
 |H|+++|
 |+|   +H+---  |
  15 ++H+   ---+++  +H+-- ++
 |N++H+-- +++---

[Qemu-devel] [PATCH v6 29/35] target-arm: emulate SWP with atomic_xchg helper

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-25-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-arm/translate.c | 26 ++
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index e0e29d9..7048cb3 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -8752,25 +8752,27 @@ static void disas_arm_insn(DisasContext *s, unsigned 
int insn)
 }
 tcg_temp_free_i32(addr);
 } else {
+TCGv taddr;
+TCGMemOp opc = s->be_data;
+
 /* SWP instruction */
 rm = (insn) & 0xf;
 
-/* ??? This is not really atomic.  However we know
-   we never have multiple CPUs running in parallel,
-   so it is good enough.  */
-addr = load_reg(s, rn);
-tmp = load_reg(s, rm);
-tmp2 = tcg_temp_new_i32();
 if (insn & (1 << 22)) {
-gen_aa32_ld8u(s, tmp2, addr, get_mem_index(s));
-gen_aa32_st8(s, tmp, addr, get_mem_index(s));
+opc |= MO_UB;
 } else {
-gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
-gen_aa32_st32(s, tmp, addr, get_mem_index(s));
+opc |= MO_UL | MO_ALIGN;
 }
-tcg_temp_free_i32(tmp);
+
+addr = load_reg(s, rn);
+taddr = gen_aa32_addr(s, addr, opc);
 tcg_temp_free_i32(addr);
-store_reg(s, rd, tmp2);
+
+tmp = load_reg(s, rm);
+tcg_gen_atomic_xchg_i32(tmp, taddr, tmp,
+get_mem_index(s), opc);
+tcg_temp_free(taddr);
+store_reg(s, rd, tmp);
 }
 }
 } else {
-- 
2.7.4




[Qemu-devel] [PATCH v6 23/35] target-i386: emulate LOCK'ed BTX ops using atomic helpers

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

[rth: Avoid redundant qemu_ld in locked case.  Fix previously unnoticed
incorrect zero-extension of address in register-offset case.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-18-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 87 -
 1 file changed, 57 insertions(+), 30 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 049b1e4..e781869 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -6655,7 +6655,9 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 if (mod != 3) {
 s->rip_offset = 1;
 gen_lea_modrm(env, s, modrm);
-gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+if (!(s->prefix & PREFIX_LOCK)) {
+gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+}
 } else {
 gen_op_mov_v_reg(ot, cpu_T0, rm);
 }
@@ -6685,44 +6687,69 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 rm = (modrm & 7) | REX_B(s);
 gen_op_mov_v_reg(MO_32, cpu_T1, reg);
 if (mod != 3) {
-gen_lea_modrm(env, s, modrm);
+AddressParts a = gen_lea_modrm_0(env, s, modrm);
 /* specific case: we need to add a displacement */
 gen_exts(ot, cpu_T1);
 tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
 tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
-tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
+gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+if (!(s->prefix & PREFIX_LOCK)) {
+gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+}
 } else {
 gen_op_mov_v_reg(ot, cpu_T0, rm);
 }
 bt_op:
 tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
-tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
-switch(op) {
-case 0:
-break;
-case 1:
-tcg_gen_movi_tl(cpu_tmp0, 1);
-tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
-tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
-break;
-case 2:
-tcg_gen_movi_tl(cpu_tmp0, 1);
-tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
-tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
-break;
-default:
-case 3:
-tcg_gen_movi_tl(cpu_tmp0, 1);
-tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
-tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
-break;
-}
-if (op != 0) {
-if (mod != 3) {
-gen_op_st_v(s, ot, cpu_T0, cpu_A0);
-} else {
-gen_op_mov_reg_v(ot, rm, cpu_T0);
+tcg_gen_movi_tl(cpu_tmp0, 1);
+tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
+if (s->prefix & PREFIX_LOCK) {
+switch (op) {
+case 0: /* bt */
+/* Needs no atomic ops; we surpressed the normal
+   memory load for LOCK above so do it now.  */
+gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+break;
+case 1: /* bts */
+tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
+   s->mem_index, ot | MO_LE);
+break;
+case 2: /* btr */
+tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
+tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
+s->mem_index, ot | MO_LE);
+break;
+default:
+case 3: /* btc */
+tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
+s->mem_index, ot | MO_LE);
+break;
+}
+tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+} else {
+tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+switch (op) {
+case 0: /* bt */
+/* Data already loaded; nothing to do.  */
+break;
+case 1: /* bts */
+tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
+break;
+case 2: /* btr */
+tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
+break;
+default:
+case 3: /* btc */
+tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
+break;
+}
+if (op != 0) {
+if (mod != 3) {
+gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+} else {
+gen_op_mov_reg_v(ot, rm, cpu_T0);
+}
 }
 }
 
-- 
2.7.4




[Qemu-devel] [PATCH v6 21/35] target-i386: emulate LOCK'ed NEG using cmpxchg helper

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

[rth: Move redundant qemu_load out of cmpxchg loop.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-16-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 38 ++
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 49455a3..17a37a3 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4713,11 +4713,41 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 }
 break;
 case 3: /* neg */
-tcg_gen_neg_tl(cpu_T0, cpu_T0);
-if (mod != 3) {
-gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+if (s->prefix & PREFIX_LOCK) {
+TCGLabel *label1;
+TCGv a0, t0, t1, t2;
+
+if (mod == 3) {
+goto illegal_op;
+}
+a0 = tcg_temp_local_new();
+t0 = tcg_temp_local_new();
+label1 = gen_new_label();
+
+tcg_gen_mov_tl(a0, cpu_A0);
+tcg_gen_mov_tl(t0, cpu_T0);
+
+gen_set_label(label1);
+t1 = tcg_temp_new();
+t2 = tcg_temp_new();
+tcg_gen_mov_tl(t2, t0);
+tcg_gen_neg_tl(t1, t0);
+tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
+  s->mem_index, ot | MO_LE);
+tcg_temp_free(t1);
+tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
+
+tcg_temp_free(t2);
+tcg_temp_free(a0);
+tcg_gen_mov_tl(cpu_T0, t0);
+tcg_temp_free(t0);
 } else {
-gen_op_mov_reg_v(ot, rm, cpu_T0);
+tcg_gen_neg_tl(cpu_T0, cpu_T0);
+if (mod != 3) {
+gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+} else {
+gen_op_mov_reg_v(ot, rm, cpu_T0);
+}
 }
 gen_op_update_neg_cc();
 set_cc_op(s, CC_OP_SUBB + ot);
-- 
2.7.4




[Qemu-devel] [PATCH v6 28/35] target-arm: emulate LL/SC using cmpxchg helpers

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

Emulating LL/SC with cmpxchg is not correct, since it can
suffer from the ABA problem. Portable parallel code, however,
is written assuming only cmpxchg--and not LL/SC--is available.
This means that in practice emulating LL/SC with cmpxchg is
a viable alternative.

The appended emulates LL/SC pairs in ARM with cmpxchg helpers.
This works in both user and system mode. In usermode, it avoids
pausing all other CPUs to perform the LL/SC pair. The subsequent
performance and scalability improvement is significant, as the
plots below show. They plot the throughput of atomic_add-bench
compiled for ARM and executed on a 64-core x86 machine.

Hi-res plots: http://imgur.com/a/aNQpB

   atomic_add-bench: 100 ops/thread, [0,1] range

  9 ++-+--+--+--+--+--+---++
+cmpxchg +-E--+   +  +  +  +  +|
  8 +Emaster +-H--+   ++
| ||
  7 ++E   ++
| ||
  6   ++
|  |   |
  5 ++ |  ++
  4 ++ |  ++
|  |   |
  3 ++ |  ++
|   |  |
  2 ++  | ++
|H++E+---  +++  ---+E+--+E+--+E|
  1 +++ +E+-+E+--+E+--+E+--+E+--   +++  +++   ++
++H+   ++++   +  +++    +  +  +|
  0 ++--HH-+-H+--+--+--+--+---++
0  10 20 30 40 50 60
   Number of threads

atomic_add-bench: 100 ops/thread, [0,2] range

  16 ++-+--+-+--+--+--+---++
 +cmpxchg +-E--+   + +  +  +  +|
  14 ++master +-H--+  ++
 | |   |
  12 ++|  ++
 | E   |
  10 ++|  ++
 | |   |
   8  ++
 |E+|  |
 |  |  |
   6 ++ | ++
 |   | |
   4 ++  |++
 |  +E+---   +++  +++  +++   ---+E+--+E|
   2 +H+ +E+--E---+E+-+E+--+E+--+E+--+++
 + |++++   +    +  +  +|
   0 ++H-HH-+-H+-+--+--+--+---++
 0  10 2030 40 50 60
Number of threads

   atomic_add-bench: 100 ops/thread, [0,128] range

  70 ++-+--+-+--+--+--+---++
 +cmpxchg +-E--+   + +  +     +|
  60 ++master +-H--+ E--+E+---++
 |-+E+---   +++ +++  +E|
 |+++  +++   ++|
  50 ++   +++  ---+E+-++
 |-E---|
  40 ++---+++ ++
 |   +++---|
 |  -+E+   |
  30 ++  +++  ++
 |   +E+   |
  20 ++ +++-- ++
 |  +E+ 

[Qemu-devel] [PATCH v6 16/35] tcg: Emit barriers with parallel_cpus

2016-10-11 Thread Richard Henderson
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 tcg/tcg-op.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index cdd61d6..bb2bfee 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -150,17 +150,7 @@ void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg 
a1, TCGArg a2,
 
 void tcg_gen_mb(TCGBar mb_type)
 {
-bool emit_barriers = true;
-
-#ifndef CONFIG_USER_ONLY
-/* TODO: When MTTCG is available for system mode, we will check
- * the following condition and enable emit_barriers
- * (qemu_tcg_mttcg_enabled() && smp_cpus > 1)
- */
-emit_barriers = false;
-#endif
-
-if (emit_barriers) {
+if (parallel_cpus) {
 tcg_gen_op1(_ctx, INDEX_op_mb, mb_type);
 }
 }
-- 
2.7.4




[Qemu-devel] [PATCH v6 22/35] target-i386: emulate LOCK'ed XADD using atomic helper

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

[rth: Move load of reg value to common location.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-17-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 17a37a3..049b1e4 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -5135,19 +5135,24 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 modrm = cpu_ldub_code(env, s->pc++);
 reg = ((modrm >> 3) & 7) | rex_r;
 mod = (modrm >> 6) & 3;
+gen_op_mov_v_reg(ot, cpu_T0, reg);
 if (mod == 3) {
 rm = (modrm & 7) | REX_B(s);
-gen_op_mov_v_reg(ot, cpu_T0, reg);
 gen_op_mov_v_reg(ot, cpu_T1, rm);
 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
 gen_op_mov_reg_v(ot, reg, cpu_T1);
 gen_op_mov_reg_v(ot, rm, cpu_T0);
 } else {
 gen_lea_modrm(env, s, modrm);
-gen_op_mov_v_reg(ot, cpu_T0, reg);
-gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
-tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+if (s->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
+s->mem_index, ot | MO_LE);
+tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+} else {
+gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+}
 gen_op_mov_reg_v(ot, reg, cpu_T1);
 }
 gen_op_update2_cc();
-- 
2.7.4




[Qemu-devel] [PATCH v6 13/35] tcg: Add atomic helpers

2016-10-11 Thread Richard Henderson
Add all of cmpxchg, op_fetch, fetch_op, and xchg.
Handle both endian-ness, and sizes up to 8.
Handle expanding non-atomically, when emulating in serial.

Signed-off-by: Richard Henderson 
---
 Makefile.objs |   2 +-
 Makefile.target   |   1 +
 atomic_template.h | 173 ++
 cputlb.c  | 112 -
 include/qemu/atomic.h |  43 ---
 tcg-runtime.c |  49 ++--
 tcg/tcg-op.c  | 328 ++
 tcg/tcg-op.h  |  44 +++
 tcg/tcg-runtime.h |  75 
 tcg/tcg.h |  53 
 10 files changed, 848 insertions(+), 32 deletions(-)
 create mode 100644 atomic_template.h

diff --git a/Makefile.objs b/Makefile.objs
index 02fb8e7..99d1f6d 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -89,7 +89,7 @@ endif
 
 ###
 # Target-independent parts used in system and user emulation
-common-obj-y += tcg-runtime.o cpus-common.o
+common-obj-y += cpus-common.o
 common-obj-y += hw/
 common-obj-y += qom/
 common-obj-y += disas/
diff --git a/Makefile.target b/Makefile.target
index 9968871..91b6fbd 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -94,6 +94,7 @@ obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
 obj-y += target-$(TARGET_BASE_ARCH)/
 obj-y += disas.o
+obj-y += tcg-runtime.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
 
diff --git a/atomic_template.h b/atomic_template.h
new file mode 100644
index 000..d2c8a08
--- /dev/null
+++ b/atomic_template.h
@@ -0,0 +1,173 @@
+/*
+ * Atomic helper templates
+ * Included from tcg-runtime.c and cputlb.c.
+ *
+ * Copyright (c) 2016 Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#if DATA_SIZE == 8
+# define SUFFIX q
+# define DATA_TYPE  uint64_t
+# define BSWAP  bswap64
+#elif DATA_SIZE == 4
+# define SUFFIX l
+# define DATA_TYPE  uint32_t
+# define BSWAP  bswap32
+#elif DATA_SIZE == 2
+# define SUFFIX w
+# define DATA_TYPE  uint16_t
+# define BSWAP  bswap16
+#elif DATA_SIZE == 1
+# define SUFFIX b
+# define DATA_TYPE  uint8_t
+# define BSWAP
+#else
+# error unsupported data size
+#endif
+
+#if DATA_SIZE >= 4
+# define ABI_TYPE  DATA_TYPE
+#else
+# define ABI_TYPE  uint32_t
+#endif
+
+#if DATA_SIZE == 1
+# define END
+#elif defined(HOST_WORDS_BIGENDIAN)
+# define END  _be
+#else
+# define END  _le
+#endif
+
+ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
+  ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
+{
+DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+return atomic_cmpxchg__nocheck(haddr, cmpv, newv);
+}
+
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
+   ABI_TYPE val EXTRA_ARGS)
+{
+DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+return atomic_xchg__nocheck(haddr, val);
+}
+
+#define GEN_ATOMIC_HELPER(X)\
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,   \
+ ABI_TYPE val EXTRA_ARGS)   \
+{   \
+DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;   \
+return atomic_##X(haddr, val);  \
+}   \
+
+GEN_ATOMIC_HELPER(fetch_add)
+GEN_ATOMIC_HELPER(fetch_and)
+GEN_ATOMIC_HELPER(fetch_or)
+GEN_ATOMIC_HELPER(fetch_xor)
+GEN_ATOMIC_HELPER(add_fetch)
+GEN_ATOMIC_HELPER(and_fetch)
+GEN_ATOMIC_HELPER(or_fetch)
+GEN_ATOMIC_HELPER(xor_fetch)
+
+#undef GEN_ATOMIC_HELPER
+#undef END
+
+#if DATA_SIZE > 1
+
+#ifdef HOST_WORDS_BIGENDIAN
+# define END  _le
+#else
+# define END  _be
+#endif
+
+ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
+  ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
+{
+DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)));
+}
+
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
+   ABI_TYPE val EXTRA_ARGS)
+{
+DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+return 

[Qemu-devel] [PATCH v6 12/35] cputlb: Tidy some macros

2016-10-11 Thread Richard Henderson
TGT_LE and TGT_BE are not size dependent and do not need to be
redefined.  The others are no longer used at all.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 cputlb.c   |  8 
 softmmu_template.h | 22 --
 2 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 1bee47d..82cf46e 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -585,6 +585,14 @@ void probe_write(CPUArchState *env, target_ulong addr, int 
mmu_idx,
 }
 }
 
+#ifdef TARGET_WORDS_BIGENDIAN
+# define TGT_BE(X)  (X)
+# define TGT_LE(X)  BSWAP(X)
+#else
+# define TGT_BE(X)  BSWAP(X)
+# define TGT_LE(X)  (X)
+#endif
+
 #define MMUSUFFIX _mmu
 
 #define DATA_SIZE 1
diff --git a/softmmu_template.h b/softmmu_template.h
index 035ffc8..4a2b665 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -78,14 +78,6 @@
 # define BSWAP(X)  (X)
 #endif
 
-#ifdef TARGET_WORDS_BIGENDIAN
-# define TGT_BE(X)  (X)
-# define TGT_LE(X)  BSWAP(X)
-#else
-# define TGT_BE(X)  BSWAP(X)
-# define TGT_LE(X)  (X)
-#endif
-
 #if DATA_SIZE == 1
 # define helper_le_ld_name  glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
 # define helper_be_ld_name  helper_le_ld_name
@@ -102,14 +94,6 @@
 # define helper_be_st_name  glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
 #endif
 
-#ifdef TARGET_WORDS_BIGENDIAN
-# define helper_te_ld_name  helper_be_ld_name
-# define helper_te_st_name  helper_be_st_name
-#else
-# define helper_te_ld_name  helper_le_ld_name
-# define helper_te_st_name  helper_le_st_name
-#endif
-
 #ifndef SOFTMMU_CODE_ACCESS
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
   size_t mmu_idx, size_t index,
@@ -441,15 +425,9 @@ void helper_be_st_name(CPUArchState *env, target_ulong 
addr, DATA_TYPE val,
 #undef USUFFIX
 #undef SSUFFIX
 #undef BSWAP
-#undef TGT_BE
-#undef TGT_LE
-#undef CPU_BE
-#undef CPU_LE
 #undef helper_le_ld_name
 #undef helper_be_ld_name
 #undef helper_le_lds_name
 #undef helper_be_lds_name
 #undef helper_le_st_name
 #undef helper_be_st_name
-#undef helper_te_ld_name
-#undef helper_te_st_name
-- 
2.7.4




[Qemu-devel] [PATCH v6 27/35] target-arm: Rearrange aa32 load and store functions

2016-10-11 Thread Richard Henderson
Stop specializing on TARGET_LONG_BITS == 32; unconditionally allocate
a temp and expand with tcg_gen_extu_i32_tl.  Split out gen_aa32_addr,
gen_aa32_frob64, gen_aa32_ld_i32 and gen_aa32_st_i32 as separate interfaces.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 target-arm/translate.c | 171 +++--
 1 file changed, 66 insertions(+), 105 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 8df24bf..f745c37 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -931,145 +931,106 @@ static inline void store_reg_from_load(DisasContext *s, 
int reg, TCGv_i32 var)
  * These functions work like tcg_gen_qemu_{ld,st}* except
  * that the address argument is TCGv_i32 rather than TCGv.
  */
-#if TARGET_LONG_BITS == 32
 
-#define DO_GEN_LD(SUFF, OPC, BE32_XOR)   \
-static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,  \
- TCGv_i32 addr, int index)   \
-{\
-TCGMemOp opc = (OPC) | s->be_data;   \
-/* Not needed for user-mode BE32, where we use MO_BE instead.  */\
-if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) {   \
-TCGv addr_be = tcg_temp_new();   \
-tcg_gen_xori_i32(addr_be, addr, BE32_XOR);   \
-tcg_gen_qemu_ld_i32(val, addr_be, index, opc);   \
-tcg_temp_free(addr_be);  \
-return;  \
-}\
-tcg_gen_qemu_ld_i32(val, addr, index, opc);  \
-}
-
-#define DO_GEN_ST(SUFF, OPC, BE32_XOR)   \
-static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,  \
- TCGv_i32 addr, int index)   \
-{\
-TCGMemOp opc = (OPC) | s->be_data;   \
-/* Not needed for user-mode BE32, where we use MO_BE instead.  */\
-if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) {   \
-TCGv addr_be = tcg_temp_new();   \
-tcg_gen_xori_i32(addr_be, addr, BE32_XOR);   \
-tcg_gen_qemu_st_i32(val, addr_be, index, opc);   \
-tcg_temp_free(addr_be);  \
-return;  \
-}\
-tcg_gen_qemu_st_i32(val, addr, index, opc);  \
-}
-
-static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
- TCGv_i32 addr, int index)
+static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, TCGMemOp op)
 {
-TCGMemOp opc = MO_Q | s->be_data;
-tcg_gen_qemu_ld_i64(val, addr, index, opc);
+TCGv addr = tcg_temp_new();
+tcg_gen_extu_i32_tl(addr, a32);
+
 /* Not needed for user-mode BE32, where we use MO_BE instead.  */
-if (!IS_USER_ONLY && s->sctlr_b) {
-tcg_gen_rotri_i64(val, val, 32);
+if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
+tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 }
+return addr;
 }
 
-static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
- TCGv_i32 addr, int index)
+static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+int index, TCGMemOp opc)
 {
-TCGMemOp opc = MO_Q | s->be_data;
-/* Not needed for user-mode BE32, where we use MO_BE instead.  */
-if (!IS_USER_ONLY && s->sctlr_b) {
-TCGv_i64 tmp = tcg_temp_new_i64();
-tcg_gen_rotri_i64(tmp, val, 32);
-tcg_gen_qemu_st_i64(tmp, addr, index, opc);
-tcg_temp_free_i64(tmp);
-return;
-}
-tcg_gen_qemu_st_i64(val, addr, index, opc);
+TCGv addr = gen_aa32_addr(s, a32, opc);
+tcg_gen_qemu_ld_i32(val, addr, index, opc);
+tcg_temp_free(addr);
 }
 
-#else
+static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+int index, TCGMemOp opc)
+{
+TCGv addr = gen_aa32_addr(s, a32, opc);
+tcg_gen_qemu_st_i32(val, addr, index, opc);
+tcg_temp_free(addr);
+}
 
-#define DO_GEN_LD(SUFF, OPC, BE32_XOR)   \
+#define DO_GEN_LD(SUFF, OPC) \
 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,  \
- TCGv_i32 

[Qemu-devel] [PATCH v6 20/35] target-i386: emulate LOCK'ed NOT using atomic helper

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

[rth: Avoid qemu_load that's redundant with the atomic op.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-15-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 26 --
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index a38d953..49455a3 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4675,10 +4675,15 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 rm = (modrm & 7) | REX_B(s);
 op = (modrm >> 3) & 7;
 if (mod != 3) {
-if (op == 0)
+if (op == 0) {
 s->rip_offset = insn_const_size(ot);
+}
 gen_lea_modrm(env, s, modrm);
-gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+/* For those below that handle locked memory, don't load here.  */
+if (!(s->prefix & PREFIX_LOCK)
+|| op != 2) {
+gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+}
 } else {
 gen_op_mov_v_reg(ot, cpu_T0, rm);
 }
@@ -4691,11 +4696,20 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 set_cc_op(s, CC_OP_LOGICB + ot);
 break;
 case 2: /* not */
-tcg_gen_not_tl(cpu_T0, cpu_T0);
-if (mod != 3) {
-gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+if (s->prefix & PREFIX_LOCK) {
+if (mod == 3) {
+goto illegal_op;
+}
+tcg_gen_movi_tl(cpu_T0, ~0);
+tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+s->mem_index, ot | MO_LE);
 } else {
-gen_op_mov_reg_v(ot, rm, cpu_T0);
+tcg_gen_not_tl(cpu_T0, cpu_T0);
+if (mod != 3) {
+gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+} else {
+gen_op_mov_reg_v(ot, rm, cpu_T0);
+}
 }
 break;
 case 3: /* neg */
-- 
2.7.4




[Qemu-devel] [PATCH v6 14/35] tcg: Add atomic128 helpers

2016-10-11 Thread Richard Henderson
Force the use of cmpxchg16b on x86_64.

Wikipedia suggests that only very old AMD64 (circa 2004) did not have
this instruction.  Further, it's required by Windows 8 so no new cpus
will ever omit it.

If we truely care about these, then we could check this at startup time
and then avoid executing paths that use it.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 atomic_template.h | 40 +++-
 configure | 29 -
 cputlb.c  |  5 +
 include/qemu/int128.h |  6 ++
 tcg-runtime.c | 18 ++
 tcg/tcg.h | 24 +++-
 6 files changed, 119 insertions(+), 3 deletions(-)

diff --git a/atomic_template.h b/atomic_template.h
index d2c8a08..4fdf722 100644
--- a/atomic_template.h
+++ b/atomic_template.h
@@ -18,7 +18,11 @@
  * License along with this library; if not, see .
  */
 
-#if DATA_SIZE == 8
+#if DATA_SIZE == 16
+# define SUFFIX o
+# define DATA_TYPE  Int128
+# define BSWAP  bswap128
+#elif DATA_SIZE == 8
 # define SUFFIX q
 # define DATA_TYPE  uint64_t
 # define BSWAP  bswap64
@@ -59,6 +63,21 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, 
target_ulong addr,
 return atomic_cmpxchg__nocheck(haddr, cmpv, newv);
 }
 
+#if DATA_SIZE >= 16
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+{
+DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+__atomic_load(haddr, , __ATOMIC_RELAXED);
+return val;
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
+ ABI_TYPE val EXTRA_ARGS)
+{
+DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+__atomic_store(haddr, , __ATOMIC_RELAXED);
+}
+#else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
 {
@@ -84,6 +103,8 @@ GEN_ATOMIC_HELPER(or_fetch)
 GEN_ATOMIC_HELPER(xor_fetch)
 
 #undef GEN_ATOMIC_HELPER
+#endif /* DATA SIZE >= 16 */
+
 #undef END
 
 #if DATA_SIZE > 1
@@ -101,6 +122,22 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, 
target_ulong addr,
 return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)));
 }
 
+#if DATA_SIZE >= 16
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+{
+DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+__atomic_load(haddr, , __ATOMIC_RELAXED);
+return BSWAP(val);
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
+ ABI_TYPE val EXTRA_ARGS)
+{
+DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+val = BSWAP(val);
+__atomic_store(haddr, , __ATOMIC_RELAXED);
+}
+#else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
 {
@@ -162,6 +199,7 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, 
target_ulong addr,
 ldo = ldn;
 }
 }
+#endif /* DATA_SIZE >= 16 */
 
 #undef END
 #endif /* DATA_SIZE > 1 */
diff --git a/configure b/configure
index 5751d8e..5b38357 100755
--- a/configure
+++ b/configure
@@ -1216,7 +1216,10 @@ case "$cpu" in
cc_i386='$(CC) -m32'
;;
 x86_64)
-   CPU_CFLAGS="-m64"
+   # ??? Only extremely old AMD cpus do not have cmpxchg16b.
+   # If we truly care, we should simply detect this case at
+   # runtime and generate the fallback to serial emulation.
+   CPU_CFLAGS="-m64 -mcx16"
LDFLAGS="-m64 $LDFLAGS"
cc_i386='$(CC) -m32'
;;
@@ -4478,6 +4481,26 @@ if compile_prog "" "" ; then
 int128=yes
 fi
 
+#
+# See if 128-bit atomic operations are supported.
+
+atomic128=no
+if test "$int128" = "yes"; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x = 0, y = 0;
+  y = __atomic_load_16(, 0);
+  __atomic_store_16(, y, 0);
+  __atomic_compare_exchange_16(, , x, 0, 0, 0);
+  return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+atomic128=yes
+  fi
+fi
+
 
 # check if getauxval is available.
 
@@ -5431,6 +5454,10 @@ if test "$int128" = "yes" ; then
   echo "CONFIG_INT128=y" >> $config_host_mak
 fi
 
+if test "$atomic128" = "yes" ; then
+  echo "CONFIG_ATOMIC128=y" >> $config_host_mak
+fi
+
 if test "$getauxval" = "yes" ; then
   echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
 fi
diff --git a/cputlb.c b/cputlb.c
index 4f2c500..845b2a7 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -690,6 +690,11 @@ static void *atomic_mmu_lookup(CPUArchState *env, 
target_ulong addr,
 #define DATA_SIZE 8
 #include "atomic_template.h"
 
+#ifdef CONFIG_ATOMIC128
+#define DATA_SIZE 16
+#include "atomic_template.h"
+#endif
+
 /* Second set of helpers are directly callable from TCG as helpers.  */
 
 #undef EXTRA_ARGS
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index d4c6e44..5c9890d 100644
--- a/include/qemu/int128.h
+++ 

[Qemu-devel] [PATCH v6 11/35] cputlb: Move most of iotlb code out of line

2016-10-11 Thread Richard Henderson
Saves 2k code size off of a cold path.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 cputlb.c   | 37 +
 softmmu_template.h | 52 ++--
 2 files changed, 47 insertions(+), 42 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 0c9b77b..1bee47d 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -498,6 +498,43 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, 
target_ulong addr)
 return qemu_ram_addr_from_host_nofail(p);
 }
 
+static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
+ target_ulong addr, uintptr_t retaddr, int size)
+{
+CPUState *cpu = ENV_GET_CPU(env);
+hwaddr physaddr = iotlbentry->addr;
+MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+uint64_t val;
+
+physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+cpu->mem_io_pc = retaddr;
+if (mr != _mem_rom && mr != _mem_notdirty && !cpu->can_do_io) {
+cpu_io_recompile(cpu, retaddr);
+}
+
+cpu->mem_io_vaddr = addr;
+memory_region_dispatch_read(mr, physaddr, , size, iotlbentry->attrs);
+return val;
+}
+
+static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
+  uint64_t val, target_ulong addr,
+  uintptr_t retaddr, int size)
+{
+CPUState *cpu = ENV_GET_CPU(env);
+hwaddr physaddr = iotlbentry->addr;
+MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+
+physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+if (mr != _mem_rom && mr != _mem_notdirty && !cpu->can_do_io) {
+cpu_io_recompile(cpu, retaddr);
+}
+
+cpu->mem_io_vaddr = addr;
+cpu->mem_io_pc = retaddr;
+memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
+}
+
 /* Return true if ADDR is present in the victim tlb, and has been copied
back to the main tlb.  */
 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
diff --git a/softmmu_template.h b/softmmu_template.h
index b9532a4..035ffc8 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -112,25 +112,12 @@
 
 #ifndef SOFTMMU_CODE_ACCESS
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
-  CPUIOTLBEntry *iotlbentry,
+  size_t mmu_idx, size_t index,
   target_ulong addr,
   uintptr_t retaddr)
 {
-uint64_t val;
-CPUState *cpu = ENV_GET_CPU(env);
-hwaddr physaddr = iotlbentry->addr;
-MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
-
-physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
-cpu->mem_io_pc = retaddr;
-if (mr != _mem_rom && mr != _mem_notdirty && !cpu->can_do_io) {
-cpu_io_recompile(cpu, retaddr);
-}
-
-cpu->mem_io_vaddr = addr;
-memory_region_dispatch_read(mr, physaddr, , DATA_SIZE,
-iotlbentry->attrs);
-return val;
+CPUIOTLBEntry *iotlbentry = >iotlb[mmu_idx][index];
+return io_readx(env, iotlbentry, addr, retaddr, DATA_SIZE);
 }
 #endif
 
@@ -161,15 +148,13 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, 
target_ulong addr,
 
 /* Handle an IO access.  */
 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-CPUIOTLBEntry *iotlbentry;
 if ((addr & (DATA_SIZE - 1)) != 0) {
 goto do_unaligned_access;
 }
-iotlbentry = >iotlb[mmu_idx][index];
 
 /* ??? Note that the io helpers always read data in the target
byte ordering.  We should push the LE/BE request down into io.  */
-res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr);
+res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr);
 res = TGT_LE(res);
 return res;
 }
@@ -230,15 +215,13 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, 
target_ulong addr,
 
 /* Handle an IO access.  */
 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-CPUIOTLBEntry *iotlbentry;
 if ((addr & (DATA_SIZE - 1)) != 0) {
 goto do_unaligned_access;
 }
-iotlbentry = >iotlb[mmu_idx][index];
 
 /* ??? Note that the io helpers always read data in the target
byte ordering.  We should push the LE/BE request down into io.  */
-res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr);
+res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr);
 res = TGT_BE(res);
 return res;
 }
@@ -289,24 +272,13 @@ WORD_TYPE helper_be_lds_name(CPUArchState *env, 
target_ulong addr,
 #endif
 
 static inline void glue(io_write, SUFFIX)(CPUArchState *env,
-  CPUIOTLBEntry *iotlbentry,
+  size_t mmu_idx, size_t 

[Qemu-devel] [PATCH v6 18/35] target-i386: emulate LOCK'ed OP instructions using atomic helpers

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

[rth: Eliminate some unnecessary temporaries.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-13-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 76 +
 1 file changed, 58 insertions(+), 18 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 5d9790a..b5c7791 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1258,55 +1258,95 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp 
ot, int d)
 {
 if (d != OR_TMP0) {
 gen_op_mov_v_reg(ot, cpu_T0, d);
-} else {
+} else if (!(s1->prefix & PREFIX_LOCK)) {
 gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
 }
 switch(op) {
 case OP_ADCL:
 gen_compute_eflags_c(s1, cpu_tmp4);
-tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
+tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update3_cc(cpu_tmp4);
 set_cc_op(s1, CC_OP_ADCB + ot);
 break;
 case OP_SBBL:
 gen_compute_eflags_c(s1, cpu_tmp4);
-tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
-tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
+tcg_gen_neg_tl(cpu_T0, cpu_T0);
+tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
+tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update3_cc(cpu_tmp4);
 set_cc_op(s1, CC_OP_SBBB + ot);
 break;
 case OP_ADDL:
-tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update2_cc();
 set_cc_op(s1, CC_OP_ADDB + ot);
 break;
 case OP_SUBL:
-tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
-tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_neg_tl(cpu_T0, cpu_T1);
+tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
+s1->mem_index, ot | MO_LE);
+tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
+} else {
+tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
+tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update2_cc();
 set_cc_op(s1, CC_OP_SUBB + ot);
 break;
 default:
 case OP_ANDL:
-tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update1_cc();
 set_cc_op(s1, CC_OP_LOGICB + ot);
 break;
 case OP_ORL:
-tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+   s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update1_cc();
 set_cc_op(s1, CC_OP_LOGICB + ot);
 break;
 case OP_XORL:
-tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update1_cc();
 set_cc_op(s1, CC_OP_LOGICB + ot);
 break;

[Qemu-devel] [PATCH v6 19/35] target-i386: emulate LOCK'ed INC using atomic helper

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

[rth: Merge gen_inc_locked back into gen_inc to share cc update.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-14-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index b5c7791..a38d953 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1362,21 +1362,23 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp 
ot, int d)
 /* if d == OR_TMP0, it means memory operand (address in A0) */
 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
 {
-if (d != OR_TMP0) {
-gen_op_mov_v_reg(ot, cpu_T0, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
+tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+s1->mem_index, ot | MO_LE);
 } else {
-gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+if (d != OR_TMP0) {
+gen_op_mov_v_reg(ot, cpu_T0, d);
+} else {
+gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+}
+tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
+gen_op_st_rm_T0_A0(s1, ot, d);
 }
+
 gen_compute_eflags_c(s1, cpu_cc_src);
-if (c > 0) {
-tcg_gen_addi_tl(cpu_T0, cpu_T0, 1);
-set_cc_op(s1, CC_OP_INCB + ot);
-} else {
-tcg_gen_addi_tl(cpu_T0, cpu_T0, -1);
-set_cc_op(s1, CC_OP_DECB + ot);
-}
-gen_op_st_rm_T0_A0(s1, ot, d);
 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
 }
 
 static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
-- 
2.7.4




[Qemu-devel] [PATCH v6 17/35] target-i386: emulate LOCK'ed cmpxchg using cmpxchg helpers

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

The diff here is uglier than necessary. All this does is to turn

FOO

into:

if (s->prefix & PREFIX_LOCK) {
  BAR
} else {
  FOO
}

where FOO is the original implementation of an unlocked cmpxchg.

[rth: Adjust unlocked cmpxchg to use movcond instead of branches.
Adjust helpers to use atomic helpers.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-6-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/helper.h |   2 +
 target-i386/mem_helper.c | 134 +++
 target-i386/translate.c  |  99 ++
 3 files changed, 169 insertions(+), 66 deletions(-)

diff --git a/target-i386/helper.h b/target-i386/helper.h
index 1320edc..729d4b6 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -74,8 +74,10 @@ DEF_HELPER_3(boundw, void, env, tl, int)
 DEF_HELPER_3(boundl, void, env, tl, int)
 DEF_HELPER_1(rsm, void, env)
 DEF_HELPER_2(into, void, env, int)
+DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl)
 DEF_HELPER_2(cmpxchg8b, void, env, tl)
 #ifdef TARGET_X86_64
+DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
 DEF_HELPER_2(cmpxchg16b, void, env, tl)
 #endif
 DEF_HELPER_1(single_step, void, env)
diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
index 5bc0594..c4b5c5b 100644
--- a/target-i386/mem_helper.c
+++ b/target-i386/mem_helper.c
@@ -22,6 +22,8 @@
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
+#include "qemu/int128.h"
+#include "tcg.h"
 
 /* broken thread support */
 
@@ -56,53 +58,143 @@ void helper_lock_init(void)
 }
 #endif
 
+void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
+{
+uintptr_t ra = GETPC();
+uint64_t oldv, cmpv, newv;
+int eflags;
+
+eflags = cpu_cc_compute_all(env, CC_OP);
+
+cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
+newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
+
+oldv = cpu_ldq_data_ra(env, a0, ra);
+newv = (cmpv == oldv ? newv : oldv);
+/* always do the store */
+cpu_stq_data_ra(env, a0, newv, ra);
+
+if (oldv == cmpv) {
+eflags |= CC_Z;
+} else {
+env->regs[R_EAX] = (uint32_t)oldv;
+env->regs[R_EDX] = (uint32_t)(oldv >> 32);
+eflags &= ~CC_Z;
+}
+CC_SRC = eflags;
+}
+
 void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
 {
-uint64_t d;
+#ifdef CONFIG_ATOMIC64
+uint64_t oldv, cmpv, newv;
 int eflags;
 
 eflags = cpu_cc_compute_all(env, CC_OP);
-d = cpu_ldq_data_ra(env, a0, GETPC());
-if (d == (((uint64_t)env->regs[R_EDX] << 32) | 
(uint32_t)env->regs[R_EAX])) {
-cpu_stq_data_ra(env, a0, ((uint64_t)env->regs[R_ECX] << 32)
-  | (uint32_t)env->regs[R_EBX], GETPC());
+
+cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
+newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
+
+#ifdef CONFIG_USER_ONLY
+{
+uint64_t *haddr = g2h(a0);
+cmpv = cpu_to_le64(cmpv);
+newv = cpu_to_le64(newv);
+oldv = atomic_cmpxchg__nocheck(haddr, cmpv, newv);
+oldv = le64_to_cpu(oldv);
+}
+#else
+{
+uintptr_t ra = GETPC();
+int mem_idx = cpu_mmu_index(env, false);
+TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);
+oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
+}
+#endif
+
+if (oldv == cmpv) {
 eflags |= CC_Z;
 } else {
-/* always do the store */
-cpu_stq_data_ra(env, a0, d, GETPC());
-env->regs[R_EDX] = (uint32_t)(d >> 32);
-env->regs[R_EAX] = (uint32_t)d;
+env->regs[R_EAX] = (uint32_t)oldv;
+env->regs[R_EDX] = (uint32_t)(oldv >> 32);
 eflags &= ~CC_Z;
 }
 CC_SRC = eflags;
+#else
+cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
+#endif /* CONFIG_ATOMIC64 */
 }
 
 #ifdef TARGET_X86_64
-void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
+void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
 {
-uint64_t d0, d1;
+uintptr_t ra = GETPC();
+Int128 oldv, cmpv, newv;
+uint64_t o0, o1;
 int eflags;
+bool success;
 
 if ((a0 & 0xf) != 0) {
 raise_exception_ra(env, EXCP0D_GPF, GETPC());
 }
 eflags = cpu_cc_compute_all(env, CC_OP);
-d0 = cpu_ldq_data_ra(env, a0, GETPC());
-d1 = cpu_ldq_data_ra(env, a0 + 8, GETPC());
-if (d0 == env->regs[R_EAX] && d1 == env->regs[R_EDX]) {
-cpu_stq_data_ra(env, a0, env->regs[R_EBX], GETPC());
-cpu_stq_data_ra(env, a0 + 8, env->regs[R_ECX], GETPC());
+
+cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
+newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
+
+o0 = cpu_ldq_data_ra(env, a0 + 0, ra);
+o1 = cpu_ldq_data_ra(env, a0 + 8, ra);
+
+oldv = int128_make128(o0, o1);
+success = 

[Qemu-devel] [PATCH v6 15/35] tcg: Add CONFIG_ATOMIC64

2016-10-11 Thread Richard Henderson
Allow qemu to build on 32-bit hosts without 64-bit atomic ops.

Even if we only allow 32-bit hosts to multi-thread emulate 32-bit
guests, we still need some way to handle the 32-bit guest using a
64-bit atomic operation.  Do so by dropping back to single-step.

Signed-off-by: Richard Henderson 
---
 configure | 33 +
 cputlb.c  |  4 
 tcg-runtime.c |  7 +++
 tcg/tcg-op.c  | 22 ++
 tcg/tcg-runtime.h | 46 --
 tcg/tcg.h | 15 ---
 6 files changed, 114 insertions(+), 13 deletions(-)

diff --git a/configure b/configure
index 5b38357..0616043 100755
--- a/configure
+++ b/configure
@@ -4501,6 +4501,35 @@ EOF
   fi
 fi
 
+#
+# See if 64-bit atomic operations are supported.
+# Note that without __atomic builtins, we can only
+# assume atomic loads/stores max at pointer size.
+
+cat > $TMPC << EOF
+#include 
+int main(void)
+{
+  uint64_t x = 0, y = 0;
+#ifdef __ATOMIC_RELAXED
+  y = __atomic_load_8(, 0);
+  __atomic_store_8(, y, 0);
+  __atomic_compare_exchange_8(, , x, 0, 0, 0);
+  __atomic_exchange_8(, y, 0);
+  __atomic_fetch_add_8(, y, 0);
+#else
+  typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1];
+  __sync_lock_test_and_set(, y);
+  __sync_val_compare_and_swap(, y, 0);
+  __sync_fetch_and_add(, y);
+#endif
+  return 0;
+}
+EOF
+if compile_prog "" "" ; then
+  atomic64=yes
+fi
+
 
 # check if getauxval is available.
 
@@ -5458,6 +5487,10 @@ if test "$atomic128" = "yes" ; then
   echo "CONFIG_ATOMIC128=y" >> $config_host_mak
 fi
 
+if test "$atomic64" = "yes" ; then
+  echo "CONFIG_ATOMIC64=y" >> $config_host_mak
+fi
+
 if test "$getauxval" = "yes" ; then
   echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
 fi
diff --git a/cputlb.c b/cputlb.c
index 845b2a7..cc4da4d 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -687,8 +687,10 @@ static void *atomic_mmu_lookup(CPUArchState *env, 
target_ulong addr,
 #define DATA_SIZE 4
 #include "atomic_template.h"
 
+#ifdef CONFIG_ATOMIC64
 #define DATA_SIZE 8
 #include "atomic_template.h"
+#endif
 
 #ifdef CONFIG_ATOMIC128
 #define DATA_SIZE 16
@@ -713,8 +715,10 @@ static void *atomic_mmu_lookup(CPUArchState *env, 
target_ulong addr,
 #define DATA_SIZE 4
 #include "atomic_template.h"
 
+#ifdef CONFIG_ATOMIC64
 #define DATA_SIZE 8
 #include "atomic_template.h"
+#endif
 
 /* Code access functions.  */
 
diff --git a/tcg-runtime.c b/tcg-runtime.c
index e952153..9327b6f 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -101,6 +101,11 @@ int64_t HELPER(mulsh_i64)(int64_t arg1, int64_t arg2)
 return h;
 }
 
+void HELPER(exit_atomic)(CPUArchState *env)
+{
+cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
+}
+
 #ifndef CONFIG_SOFTMMU
 /* The softmmu versions of these helpers are in cputlb.c.  */
 
@@ -130,8 +135,10 @@ static void *atomic_mmu_lookup(CPUArchState *env, 
target_ulong addr,
 #define DATA_SIZE 4
 #include "atomic_template.h"
 
+#ifdef CONFIG_ATOMIC64
 #define DATA_SIZE 8
 #include "atomic_template.h"
+#endif
 
 /* The following is only callable from other helpers, and matches up
with the softmmu version.  */
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 65e3663..cdd61d6 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2040,14 +2040,20 @@ typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, 
TCGv, TCGv_i32);
 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64);
 #endif
 
+#ifdef CONFIG_ATOMIC64
+# define WITH_ATOMIC64(X) X,
+#else
+# define WITH_ATOMIC64(X)
+#endif
+
 static void * const table_cmpxchg[16] = {
 [MO_8] = gen_helper_atomic_cmpxchgb,
 [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
 [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
 [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
 [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
-[MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le,
-[MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be,
+WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
+WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
 };
 
 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
@@ -2117,6 +2123,7 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, 
TCGv_i64 cmpv,
 }
 tcg_temp_free_i64(t1);
 } else if ((memop & MO_SIZE) == MO_64) {
+#ifdef CONFIG_ATOMIC64
 gen_atomic_cx_i64 gen;
 
 gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
@@ -2131,6 +2138,9 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, 
TCGv_i64 cmpv,
 #else
 gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv);
 #endif
+#else
+gen_helper_exit_atomic(tcg_ctx.tcg_env);
+#endif /* CONFIG_ATOMIC64 */
 } else {
 TCGv_i32 c32 = tcg_temp_new_i32();
 TCGv_i32 n32 = tcg_temp_new_i32();
@@ -2218,6 +2228,7 @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv 

[Qemu-devel] [PATCH v6 10/35] cputlb: Remove includes from softmmu_template.h

2016-10-11 Thread Richard Henderson
We already include exec/address-spaces.h and exec/memory.h in
cputlb.c; the include of qemu/timer.h appears to be a fossil.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 softmmu_template.h | 4 
 1 file changed, 4 deletions(-)

diff --git a/softmmu_template.h b/softmmu_template.h
index 538cff5..b9532a4 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -21,10 +21,6 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see .
  */
-#include "qemu/timer.h"
-#include "exec/address-spaces.h"
-#include "exec/memory.h"
-
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define LSUFFIX q
-- 
2.7.4




[Qemu-devel] [PATCH v6 08/35] cputlb: Replace SHIFT with DATA_SIZE

2016-10-11 Thread Richard Henderson
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 cputlb.c   | 16 
 softmmu_template.h |  7 ++-
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 3c99c34..5575b73 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -529,16 +529,16 @@ static bool victim_tlb_hit(CPUArchState *env, size_t 
mmu_idx, size_t index,
 
 #define MMUSUFFIX _mmu
 
-#define SHIFT 0
+#define DATA_SIZE 1
 #include "softmmu_template.h"
 
-#define SHIFT 1
+#define DATA_SIZE 2
 #include "softmmu_template.h"
 
-#define SHIFT 2
+#define DATA_SIZE 4
 #include "softmmu_template.h"
 
-#define SHIFT 3
+#define DATA_SIZE 8
 #include "softmmu_template.h"
 #undef MMUSUFFIX
 
@@ -547,14 +547,14 @@ static bool victim_tlb_hit(CPUArchState *env, size_t 
mmu_idx, size_t index,
 #define GETPC() ((uintptr_t)0)
 #define SOFTMMU_CODE_ACCESS
 
-#define SHIFT 0
+#define DATA_SIZE 1
 #include "softmmu_template.h"
 
-#define SHIFT 1
+#define DATA_SIZE 2
 #include "softmmu_template.h"
 
-#define SHIFT 2
+#define DATA_SIZE 4
 #include "softmmu_template.h"
 
-#define SHIFT 3
+#define DATA_SIZE 8
 #include "softmmu_template.h"
diff --git a/softmmu_template.h b/softmmu_template.h
index 27ed269..f9c51fe 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -25,8 +25,6 @@
 #include "exec/address-spaces.h"
 #include "exec/memory.h"
 
-#define DATA_SIZE (1 << SHIFT)
-
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define LSUFFIX q
@@ -134,7 +132,7 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState 
*env,
 }
 
 cpu->mem_io_vaddr = addr;
-memory_region_dispatch_read(mr, physaddr, , 1 << SHIFT,
+memory_region_dispatch_read(mr, physaddr, , DATA_SIZE,
 iotlbentry->attrs);
 return val;
 }
@@ -311,7 +309,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env,
 
 cpu->mem_io_vaddr = addr;
 cpu->mem_io_pc = retaddr;
-memory_region_dispatch_write(mr, physaddr, val, 1 << SHIFT,
+memory_region_dispatch_write(mr, physaddr, val, DATA_SIZE,
  iotlbentry->attrs);
 }
 
@@ -492,7 +490,6 @@ void probe_write(CPUArchState *env, target_ulong addr, int 
mmu_idx,
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
 #undef READ_ACCESS_TYPE
-#undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
 #undef LSUFFIX
-- 
2.7.4




[Qemu-devel] [PATCH v6 09/35] cputlb: Move probe_write out of softmmu_template.h

2016-10-11 Thread Richard Henderson
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 cputlb.c   | 21 +
 softmmu_template.h | 23 ---
 2 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 5575b73..0c9b77b 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -527,6 +527,27 @@ static bool victim_tlb_hit(CPUArchState *env, size_t 
mmu_idx, size_t index,
   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
  (ADDR) & TARGET_PAGE_MASK)
 
+/* Probe for whether the specified guest write access is permitted.
+ * If it is not permitted then an exception will be taken in the same
+ * way as if this were a real write access (and we will not return).
+ * Otherwise the function will return, and there will be a valid
+ * entry in the TLB for this access.
+ */
+void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
+ uintptr_t retaddr)
+{
+int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+
+if ((addr & TARGET_PAGE_MASK)
+!= (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+/* TLB entry is for a different page */
+if (!VICTIM_TLB_HIT(addr_write, addr)) {
+tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
+}
+}
+}
+
 #define MMUSUFFIX _mmu
 
 #define DATA_SIZE 1
diff --git a/softmmu_template.h b/softmmu_template.h
index f9c51fe..538cff5 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -464,29 +464,6 @@ void helper_be_st_name(CPUArchState *env, target_ulong 
addr, DATA_TYPE val,
 glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
 }
 #endif /* DATA_SIZE > 1 */
-
-#if DATA_SIZE == 1
-/* Probe for whether the specified guest write access is permitted.
- * If it is not permitted then an exception will be taken in the same
- * way as if this were a real write access (and we will not return).
- * Otherwise the function will return, and there will be a valid
- * entry in the TLB for this access.
- */
-void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
- uintptr_t retaddr)
-{
-int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
-
-if ((addr & TARGET_PAGE_MASK)
-!= (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-/* TLB entry is for a different page */
-if (!VICTIM_TLB_HIT(addr_write, addr)) {
-tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
-}
-}
-}
-#endif
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
 #undef READ_ACCESS_TYPE
-- 
2.7.4




[Qemu-devel] [PATCH v6 01/35] atomics: add atomic_xor

2016-10-11 Thread Richard Henderson
From: "Emilio G. Cota" 

This paves the way for upcoming work.

Reviewed-by: Alex Bennée 
Signed-off-by: Emilio G. Cota 
Signed-off-by: Richard Henderson 
Message-Id: <1467054136-10430-8-git-send-email-c...@braap.org>
---
 include/qemu/atomic.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 0cce246..0124289 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -193,6 +193,7 @@
 #define atomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)
 #define atomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)
 #define atomic_fetch_or(ptr, n)  __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)
+#define atomic_fetch_xor(ptr, n) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)
 
 /* And even shorter names that return void.  */
 #define atomic_inc(ptr)((void) __atomic_fetch_add(ptr, 1, 
__ATOMIC_SEQ_CST))
@@ -201,6 +202,7 @@
 #define atomic_sub(ptr, n) ((void) __atomic_fetch_sub(ptr, n, 
__ATOMIC_SEQ_CST))
 #define atomic_and(ptr, n) ((void) __atomic_fetch_and(ptr, n, 
__ATOMIC_SEQ_CST))
 #define atomic_or(ptr, n)  ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST))
+#define atomic_xor(ptr, n) ((void) __atomic_fetch_xor(ptr, n, 
__ATOMIC_SEQ_CST))
 
 #else /* __ATOMIC_RELAXED */
 
@@ -387,6 +389,7 @@
 #define atomic_fetch_sub   __sync_fetch_and_sub
 #define atomic_fetch_and   __sync_fetch_and_and
 #define atomic_fetch_or__sync_fetch_and_or
+#define atomic_fetch_xor   __sync_fetch_and_xor
 #define atomic_cmpxchg __sync_val_compare_and_swap
 
 /* And even shorter names that return void.  */
@@ -396,6 +399,7 @@
 #define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n))
 #define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n))
 #define atomic_or(ptr, n)  ((void) __sync_fetch_and_or(ptr, n))
+#define atomic_xor(ptr, n) ((void) __sync_fetch_and_xor(ptr, n))
 
 #endif /* __ATOMIC_RELAXED */
 #endif /* QEMU_ATOMIC_H */
-- 
2.7.4




[Qemu-devel] [PATCH v6 07/35] linux-user: enable parallel code generation on clone

2016-10-11 Thread Richard Henderson
From: Alex Bennée 

The variable parallel_cpus controls the generation of thread aware
atomic code.  We only need to set it once we clone our first thread.
At this point any existing translations need to be thrown away.

Signed-off-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 linux-user/syscall.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 0815f30..9d528bd 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -6144,6 +6144,14 @@ static int do_fork(CPUArchState *env, unsigned int 
flags, abi_ulong newsp,
 sigfillset();
 sigprocmask(SIG_BLOCK, , );
 
+/* If this is our first additional thread, we need to ensure we
+ * generate code for parallel execution and flush old translations.
+ */
+if (!parallel_cpus) {
+parallel_cpus = true;
+tb_flush(cpu);
+}
+
 ret = pthread_create(, , clone_func, );
 /* TODO: Free new CPU state if thread creation failed.  */
 
-- 
2.7.4




[Qemu-devel] [PATCH v6 05/35] int128: Add int128_make128

2016-10-11 Thread Richard Henderson
Allows Int128 to be used more generally, rather than having to
begin with 64-bit inputs and accumulate.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 include/qemu/int128.h | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 64a7aca..d4c6e44 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -10,6 +10,11 @@ static inline Int128 int128_make64(uint64_t a)
 return a;
 }
 
+static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
+{
+return (__uint128_t)hi << 64 | lo;
+}
+
 static inline uint64_t int128_get64(Int128 a)
 {
 uint64_t r = a;
@@ -146,6 +151,11 @@ static inline Int128 int128_make64(uint64_t a)
 return (Int128) { a, 0 };
 }
 
+static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
+{
+return (Int128) { lo, hi };
+}
+
 static inline uint64_t int128_get64(Int128 a)
 {
 assert(!a.hi);
@@ -195,9 +205,9 @@ static inline Int128 int128_rshift(Int128 a, int n)
 }
 h = a.hi >> (n & 63);
 if (n >= 64) {
-return (Int128) { h, h >> 63 };
+return int128_make128(h, h >> 63);
 } else {
-return (Int128) { (a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h };
+return int128_make128((a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h);
 }
 }
 
@@ -211,18 +221,18 @@ static inline Int128 int128_add(Int128 a, Int128 b)
  *
  * So the carry is lo < a.lo.
  */
-return (Int128) { lo, (uint64_t)a.hi + b.hi + (lo < a.lo) };
+return int128_make128(lo, (uint64_t)a.hi + b.hi + (lo < a.lo));
 }
 
 static inline Int128 int128_neg(Int128 a)
 {
 uint64_t lo = -a.lo;
-return (Int128) { lo, ~(uint64_t)a.hi + !lo };
+return int128_make128(lo, ~(uint64_t)a.hi + !lo);
 }
 
 static inline Int128 int128_sub(Int128 a, Int128 b)
 {
-return (Int128){ a.lo - b.lo, (uint64_t)a.hi - b.hi - (a.lo < b.lo) };
+return int128_make128(a.lo - b.lo, (uint64_t)a.hi - b.hi - (a.lo < b.lo));
 }
 
 static inline bool int128_nonneg(Int128 a)
-- 
2.7.4




[Qemu-devel] [PATCH v6 03/35] exec: Avoid direct references to Int128 parts

2016-10-11 Thread Richard Henderson
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 exec.c|  4 ++--
 include/qemu/int128.h | 10 ++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/exec.c b/exec.c
index c8389f9..cc2997e 100644
--- a/exec.c
+++ b/exec.c
@@ -320,9 +320,9 @@ static inline bool section_covers_addr(const 
MemoryRegionSection *section,
 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
  * the section must cover the entire address space.
  */
-return section->size.hi ||
+return int128_gethi(section->size) ||
range_covers_byte(section->offset_within_address_space,
- section->size.lo, addr);
+ int128_getlo(section->size), addr);
 }
 
 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index c598881..52aaf99 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -20,6 +20,16 @@ static inline uint64_t int128_get64(Int128 a)
 return a.lo;
 }
 
+static inline uint64_t int128_getlo(Int128 a)
+{
+return a.lo;
+}
+
+static inline int64_t int128_gethi(Int128 a)
+{
+return a.hi;
+}
+
 static inline Int128 int128_zero(void)
 {
 return int128_make64(0);
-- 
2.7.4




[Qemu-devel] [PATCH v6 04/35] int128: Use __int128 if available

2016-10-11 Thread Richard Henderson
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 include/qemu/int128.h | 135 +-
 tests/test-int128.c   |  22 
 2 files changed, 145 insertions(+), 12 deletions(-)

diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 52aaf99..64a7aca 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -1,6 +1,138 @@
 #ifndef INT128_H
 #define INT128_H
 
+#ifdef CONFIG_INT128
+
+typedef __int128_t Int128;
+
+static inline Int128 int128_make64(uint64_t a)
+{
+return a;
+}
+
+static inline uint64_t int128_get64(Int128 a)
+{
+uint64_t r = a;
+assert(r == a);
+return r;
+}
+
+static inline uint64_t int128_getlo(Int128 a)
+{
+return a;
+}
+
+static inline int64_t int128_gethi(Int128 a)
+{
+return a >> 64;
+}
+
+static inline Int128 int128_zero(void)
+{
+return 0;
+}
+
+static inline Int128 int128_one(void)
+{
+return 1;
+}
+
+static inline Int128 int128_2_64(void)
+{
+return (Int128)1 << 64;
+}
+
+static inline Int128 int128_exts64(int64_t a)
+{
+return a;
+}
+
+static inline Int128 int128_and(Int128 a, Int128 b)
+{
+return a & b;
+}
+
+static inline Int128 int128_rshift(Int128 a, int n)
+{
+return a >> n;
+}
+
+static inline Int128 int128_add(Int128 a, Int128 b)
+{
+return a + b;
+}
+
+static inline Int128 int128_neg(Int128 a)
+{
+return -a;
+}
+
+static inline Int128 int128_sub(Int128 a, Int128 b)
+{
+return a - b;
+}
+
+static inline bool int128_nonneg(Int128 a)
+{
+return a >= 0;
+}
+
+static inline bool int128_eq(Int128 a, Int128 b)
+{
+return a == b;
+}
+
+static inline bool int128_ne(Int128 a, Int128 b)
+{
+return a != b;
+}
+
+static inline bool int128_ge(Int128 a, Int128 b)
+{
+return a >= b;
+}
+
+static inline bool int128_lt(Int128 a, Int128 b)
+{
+return a < b;
+}
+
+static inline bool int128_le(Int128 a, Int128 b)
+{
+return a <= b;
+}
+
+static inline bool int128_gt(Int128 a, Int128 b)
+{
+return a > b;
+}
+
+static inline bool int128_nz(Int128 a)
+{
+return a != 0;
+}
+
+static inline Int128 int128_min(Int128 a, Int128 b)
+{
+return a < b ? a : b;
+}
+
+static inline Int128 int128_max(Int128 a, Int128 b)
+{
+return a > b ? a : b;
+}
+
+static inline void int128_addto(Int128 *a, Int128 b)
+{
+*a += b;
+}
+
+static inline void int128_subfrom(Int128 *a, Int128 b)
+{
+*a -= b;
+}
+
+#else /* !CONFIG_INT128 */
 
 typedef struct Int128 Int128;
 
@@ -153,4 +285,5 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
 *a = int128_sub(*a, b);
 }
 
-#endif
+#endif /* CONFIG_INT128 */
+#endif /* INT128_H */
diff --git a/tests/test-int128.c b/tests/test-int128.c
index 4390123..b86a3c7 100644
--- a/tests/test-int128.c
+++ b/tests/test-int128.c
@@ -41,7 +41,7 @@ static Int128 expand(uint32_t x)
 uint64_t l, h;
 l = expand16(x & 65535);
 h = expand16(x >> 16);
-return (Int128) {l, h};
+return (Int128) int128_make128(l, h);
 };
 
 static void test_and(void)
@@ -54,8 +54,8 @@ static void test_and(void)
 Int128 b = expand(tests[j]);
 Int128 r = expand(tests[i] & tests[j]);
 Int128 s = int128_and(a, b);
-g_assert_cmpuint(r.lo, ==, s.lo);
-g_assert_cmpuint(r.hi, ==, s.hi);
+g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
+g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
 }
 }
 }
@@ -70,8 +70,8 @@ static void test_add(void)
 Int128 b = expand(tests[j]);
 Int128 r = expand(tests[i] + tests[j]);
 Int128 s = int128_add(a, b);
-g_assert_cmpuint(r.lo, ==, s.lo);
-g_assert_cmpuint(r.hi, ==, s.hi);
+g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
+g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
 }
 }
 }
@@ -86,8 +86,8 @@ static void test_sub(void)
 Int128 b = expand(tests[j]);
 Int128 r = expand(tests[i] - tests[j]);
 Int128 s = int128_sub(a, b);
-g_assert_cmpuint(r.lo, ==, s.lo);
-g_assert_cmpuint(r.hi, ==, s.hi);
+g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
+g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
 }
 }
 }
@@ -100,8 +100,8 @@ static void test_neg(void)
 Int128 a = expand(tests[i]);
 Int128 r = expand(-tests[i]);
 Int128 s = int128_neg(a);
-g_assert_cmpuint(r.lo, ==, s.lo);
-g_assert_cmpuint(r.hi, ==, s.hi);
+g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
+g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
 }
 }
 
@@ -180,8 +180,8 @@ test_rshift_one(uint32_t x, int n, uint64_t h, uint64_t l)
 {
 Int128 a = expand(x);
 Int128 r = int128_rshift(a, n);
-g_assert_cmpuint(r.lo, ==, l);
-g_assert_cmpuint(r.hi, ==, h);
+

  1   2   3   4   >