[PATCH v5 2/4] KVM: arm/arm64: Replace lr_used with elrsr

2015-11-03 Thread Pavel Fedin
Since commit ae705930fca6322600690df9dc1c7d0516145a93 ("arm/arm64: KVM:
Keep elrsr/aisr in sync with software model") lr_used is completely
redundant, because together with lr_used we also update elrsr. This allows
to easily replace lr_used with elrsr, inverting all conditions (because in
elrsr '1' means 'free').

Signed-off-by: Pavel Fedin 
---
 include/kvm/arm_vgic.h |  3 ---
 virt/kvm/arm/vgic-v2.c |  1 +
 virt/kvm/arm/vgic-v3.c |  1 +
 virt/kvm/arm/vgic.c| 37 +
 4 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index c74dc7b..3936bf8 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -305,9 +305,6 @@ struct vgic_cpu {
unsigned long   *active_shared;
unsigned long   *pend_act_shared;
 
-   /* Bitmap of used/free list registers */
-   DECLARE_BITMAP(lr_used, VGIC_V2_MAX_LRS);
-
/* Number of list registers on this CPU */
int nr_lr;
 
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 8d7b04d..c0f5d7f 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -158,6 +158,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
 * anyway.
 */
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+   vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
 
/* Get the show on the road... */
vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 7dd5d62..92003cb 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -193,6 +193,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
 * anyway.
 */
vgic_v3->vgic_vmcr = 0;
+   vgic_v3->vgic_elrsr = ~0;
 
/*
 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 54233e0..265a410 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -108,6 +108,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu 
*vcpu);
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
+static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu);
 static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
int virt_irq);
 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu);
@@ -691,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio 
*mmio,
 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 {
struct vgic_cpu *vgic_cpu = >arch.vgic_cpu;
+   u64 elrsr = vgic_get_elrsr(vcpu);
+   unsigned long *elrsr_ptr = u64_to_bitmask();
int i;
 
-   for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+   for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) {
struct vgic_lr lr = vgic_get_lr(vcpu, i);
 
/*
@@ -1098,7 +1101,6 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu)
 
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
-   struct vgic_cpu *vgic_cpu = >arch.vgic_cpu;
struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
 
/*
@@ -1112,7 +1114,6 @@ static void vgic_retire_lr(int lr_nr, int irq, struct 
kvm_vcpu *vcpu)
 
vlr.state = 0;
vgic_set_lr(vcpu, lr_nr, vlr);
-   clear_bit(lr_nr, vgic_cpu->lr_used);
vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
 }
 
@@ -1127,10 +1128,11 @@ static void vgic_retire_lr(int lr_nr, int irq, struct 
kvm_vcpu *vcpu)
  */
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 {
-   struct vgic_cpu *vgic_cpu = >arch.vgic_cpu;
+   u64 elrsr = vgic_get_elrsr(vcpu);
+   unsigned long *elrsr_ptr = u64_to_bitmask();
int lr;
 
-   for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
+   for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
@@ -1187,8 +1189,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, 
int irq,
  */
 bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 {
-   struct vgic_cpu *vgic_cpu = >arch.vgic_cpu;
struct vgic_dist *dist = >kvm->arch.vgic;
+   u64 elrsr = vgic_get_elrsr(vcpu);
+   unsigned long *elrsr_ptr = u64_to_bitmask();
struct vgic_lr vlr;
int lr;
 
@@ -1200,7 +1203,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 
sgi_source_id, int irq)
kvm_debug("Queue IRQ%d\n", irq);
 
/* Do we have an active interrupt for the same CPUID? */
-   for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+   for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
vlr = vgic_get_lr(vcpu, lr);
if 

[PATCH v5 1/4] KVM: arm/arm64: Remove vgic_irq_lr_map

2015-11-03 Thread Pavel Fedin
Currently we use vgic_irq_lr_map in order to track which LRs hold which
IRQs, and lr_used bitmap in order to track which LRs are used or free.

vgic_irq_lr_map is actually used only in one place for piggy-back
optimization, and can be easily replaced by iteration over lr_used.
Therefore we remove it in order to get prepared for LPI support
introduction. After this number of IRQs will grow up to at least 16384,
while numbers from 1024 to 8192 are never going to be used. This would be
a huge memory waste.

Signed-off-by: Pavel Fedin 
---
 include/kvm/arm_vgic.h |  3 ---
 virt/kvm/arm/vgic.c| 18 +++---
 2 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 8065801..c74dc7b 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -295,9 +295,6 @@ struct vgic_v3_cpu_if {
 };
 
 struct vgic_cpu {
-   /* per IRQ to LR mapping */
-   u8  *vgic_irq_lr_map;
-
/* Pending/active/both interrupts on this VCPU */
DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS);
DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index d4669eb..54233e0 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1113,7 +1113,6 @@ static void vgic_retire_lr(int lr_nr, int irq, struct 
kvm_vcpu *vcpu)
vlr.state = 0;
vgic_set_lr(vcpu, lr_nr, vlr);
clear_bit(lr_nr, vgic_cpu->lr_used);
-   vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
 }
 
@@ -1200,14 +1199,11 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 
sgi_source_id, int irq)
 
kvm_debug("Queue IRQ%d\n", irq);
 
-   lr = vgic_cpu->vgic_irq_lr_map[irq];
-
/* Do we have an active interrupt for the same CPUID? */
-   if (lr != LR_EMPTY) {
+   for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
vlr = vgic_get_lr(vcpu, lr);
-   if (vlr.source == sgi_source_id) {
+   if (vlr.irq == irq && vlr.source == sgi_source_id) {
kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
-   BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
return true;
}
@@ -1220,7 +1216,6 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 
sgi_source_id, int irq)
return false;
 
kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
-   vgic_cpu->vgic_irq_lr_map[irq] = lr;
set_bit(lr, vgic_cpu->lr_used);
 
vlr.irq = irq;
@@ -1484,7 +1479,6 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
clear_bit(lr, vgic_cpu->lr_used);
 
BUG_ON(vlr.irq >= dist->nr_irqs);
-   vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
}
 
/* Check if we still have something up our sleeve... */
@@ -1912,12 +1906,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
kfree(vgic_cpu->pending_shared);
kfree(vgic_cpu->active_shared);
kfree(vgic_cpu->pend_act_shared);
-   kfree(vgic_cpu->vgic_irq_lr_map);
vgic_destroy_irq_phys_map(vcpu->kvm, _cpu->irq_phys_map_list);
vgic_cpu->pending_shared = NULL;
vgic_cpu->active_shared = NULL;
vgic_cpu->pend_act_shared = NULL;
-   vgic_cpu->vgic_irq_lr_map = NULL;
 }
 
 static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
@@ -1928,18 +1920,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, 
int nr_irqs)
vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
-   vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
 
if (!vgic_cpu->pending_shared
|| !vgic_cpu->active_shared
-   || !vgic_cpu->pend_act_shared
-   || !vgic_cpu->vgic_irq_lr_map) {
+   || !vgic_cpu->pend_act_shared) {
kvm_vgic_vcpu_destroy(vcpu);
return -ENOMEM;
}
 
-   memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
-
/*
 * Store the number of LRs per vcpu, so we don't have to go
 * all the way to the distributor structure to find out. Only
-- 
2.4.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 0/4] KVM: arm/arm64: Clean up some obsolete code

2015-11-03 Thread Pavel Fedin
Current KVM code has lots of old redundancies, which can be cleaned up.
This patchset is actually a better alternative to
http://www.spinics.net/lists/arm-kernel/msg430726.html, which allows to
keep piggy-backed LRs. The idea is based on the fact that our code also
maintains LR state in elrsr, and this information is enough to track LR
usage.

In case of problems this series can be applied partially, each patch is
a complete refactoring step on its own.

Thanks to Andre Przywara for pinpointing some 4.3+ specifics.

This version has been tested on SMDK5410 development board
(Exynos5410 SoC).

v4 => v5:
- Split up the first patch into two, for simpler bisection.

v3 => v4:
- Reordered changes for purpose of better understanding and bisection. All
  changes related to vgic_retire_lr() are gathered in one patch now.

v2 => v3:
- Removed two unused variables in __kvm_vgic_flush_hwstate(), overlooked
  leftover from v1.

v1 => v2:
- Rebased to kvmarm/next of 23.10.2015.
- Do not use vgic_retire_lr() for initializing ELRSR bitmask, because now
  it also handles pushback of PENDING state, use direct initialization
  instead (copied from Andre's patchset).
- Took more care about vgic_retire_lr(), which has deserved own patch.

Pavel Fedin (4):
  KVM: arm/arm64: Remove vgic_irq_lr_map
  KVM: arm/arm64: Replace lr_used with elrsr
  KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings
  KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr()

 include/kvm/arm_vgic.h |   7 
 virt/kvm/arm/vgic-v2.c |   6 +--
 virt/kvm/arm/vgic-v3.c |   6 +--
 virt/kvm/arm/vgic.c| 104 +
 4 files changed, 29 insertions(+), 94 deletions(-)

-- 
2.4.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 4/4] KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr()

2015-11-03 Thread Pavel Fedin
Now we see that vgic_set_lr() and vgic_sync_lr_elrsr() are always used
together. Merge them into one function, saving from second vgic_ops
dereferencing every time.

Signed-off-by: Pavel Fedin 
---
 include/kvm/arm_vgic.h |  1 -
 virt/kvm/arm/vgic-v2.c |  5 -
 virt/kvm/arm/vgic-v3.c |  5 -
 virt/kvm/arm/vgic.c| 14 ++
 4 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 3936bf8..f62addc 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -112,7 +112,6 @@ struct vgic_vmcr {
 struct vgic_ops {
struct vgic_lr  (*get_lr)(const struct kvm_vcpu *, int);
void(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
-   void(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
void(*clear_eisr)(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index c0f5d7f..ff02f08 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
 
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
-}
 
-static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
- struct vgic_lr lr_desc)
-{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
else
@@ -167,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
 static const struct vgic_ops vgic_v2_ops = {
.get_lr = vgic_v2_get_lr,
.set_lr = vgic_v2_set_lr,
-   .sync_lr_elrsr  = vgic_v2_sync_lr_elrsr,
.get_elrsr  = vgic_v2_get_elrsr,
.get_eisr   = vgic_v2_get_eisr,
.clear_eisr = vgic_v2_clear_eisr,
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 92003cb..487d635 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
}
 
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
-}
 
-static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
- struct vgic_lr lr_desc)
-{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
else
@@ -212,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
 static const struct vgic_ops vgic_v3_ops = {
.get_lr = vgic_v3_get_lr,
.set_lr = vgic_v3_set_lr,
-   .sync_lr_elrsr  = vgic_v3_sync_lr_elrsr,
.get_elrsr  = vgic_v3_get_elrsr,
.get_eisr   = vgic_v3_get_eisr,
.clear_eisr = vgic_v3_clear_eisr,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 96e45f3..fe451d4 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1032,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
vgic_ops->set_lr(vcpu, lr, vlr);
 }
 
-static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
-  struct vgic_lr vlr)
-{
-   vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
-}
-
 static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
 {
return vgic_ops->get_elrsr(vcpu);
@@ -1100,7 +1094,6 @@ static void vgic_retire_lr(int lr_nr, struct kvm_vcpu 
*vcpu)
 
vlr.state = 0;
vgic_set_lr(vcpu, lr_nr, vlr);
-   vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
 }
 
 /*
@@ -1162,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, 
int irq,
}
 
vgic_set_lr(vcpu, lr_nr, vlr);
-   vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
 }
 
 /*
@@ -1340,8 +1332,6 @@ static int process_queued_irq(struct kvm_vcpu *vcpu,
vlr.hwirq = 0;
vgic_set_lr(vcpu, lr, vlr);
 
-   vgic_sync_lr_elrsr(vcpu, lr, vlr);
-
return pending;
 }
 
@@ -1442,8 +1432,6 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
bool level_pending;
 
level_pending = vgic_process_maintenance(vcpu);
-   elrsr = vgic_get_elrsr(vcpu);
-   elrsr_ptr = u64_to_bitmask();
 
/* Deal with HW interrupts, and clear mappings for empty LRs */
for (lr = 0; lr < vgic->nr_lr; lr++) {
@@ -1454,6 +1442,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
}
 
/* Check if we still have something up our sleeve... */
+   elrsr = vgic_get_elrsr(vcpu);
+   elrsr_ptr = u64_to_bitmask();
pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
if (level_pending || pending < vgic->nr_lr)
set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
-- 
2.4.4

--
To 

Re: [PATCH/RFC 0/4] dma ops and virtio

2015-11-03 Thread Christian Borntraeger
Am 02.11.2015 um 21:23 schrieb Andy Lutomirski:
> On Mon, Nov 2, 2015 at 3:16 AM, Cornelia Huck  
> wrote:
>> On Fri, 30 Oct 2015 13:33:07 -0700
>> Andy Lutomirski  wrote:
>>
>>> On Fri, Oct 30, 2015 at 1:25 AM, Cornelia Huck  
>>> wrote:
 On Thu, 29 Oct 2015 15:50:38 -0700
 Andy Lutomirski  wrote:

> Progress!  After getting that sort-of-working, I figured out what was
> wrong with my earlier command, and I got that working, too.  Now I
> get:
>
> qemu-system-s390x -fsdev
> local,id=virtfs1,path=/,security_model=none,readonly -device
> virtio-9p-ccw,fsdev=virtfs1,mount_tag=/dev/root -M s390-ccw-virtio
> -nodefaults -device sclpconsole,chardev=console -parallel none -net
> none -echr 1 -serial none -chardev stdio,id=console,signal=off,mux=on
> -serial chardev:console -mon chardev=console -vga none -display none
> -kernel arch/s390/boot/bzImage -append
> 'init=/home/luto/devel/virtme/virtme/guest/virtme-init
> psmouse.proto=exps "virtme_stty_con=rows 24 cols 150 iutf8"
> TERM=xterm-256color rootfstype=9p
> rootflags=ro,version=9p2000.L,trans=virtio,access=any
> raid=noautodetect debug'

 The commandline looks sane AFAICS.

 (...)

> vrfy: device 0.0.: rc=0 pgroup=0 mpath=0 vpm=80
> virtio_ccw 0.0.: Failed to set online: -5
>
> ^^^ bad news!

 I'd like to see where in the onlining process this fails. Could you set
 up qemu tracing for css_* and virtio_ccw_* (instructions in
 qemu/docs/tracing.txt)?
>>>
>>> I have a file called events that contains:
>>>
>>> css_*
>>> virtio_ccw_*
>>>
>>> pointing -trace events= at it results in a trace- file that's 549
>>> bytes long and contains nothing.  Are wildcards not as well-supported
>>> as the docs suggest?
>>
>> Just tried it, seemed to work for me as expected. And as your messages
>> indicate, at least some of the css tracepoints are guaranteed to be
>> hit. Odd.
>>
>> Can you try the following sophisticated printf debug patch?
>>
>> diff --git a/hw/s390x/css.c b/hw/s390x/css.c
>> index c033612..6a87bd6 100644
>> --- a/hw/s390x/css.c
>> +++ b/hw/s390x/css.c
>> @@ -308,6 +308,8 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr 
>> ccw_addr)
>>  sch->ccw_no_data_cnt++;
>>  }
>>
>> +fprintf(stderr, "CH DBG: %s: cmd_code=%x\n", __func__, ccw.cmd_code);
>> +
>>  /* Look at the command. */
>>  switch (ccw.cmd_code) {
>>  case CCW_CMD_NOOP:
>> @@ -375,6 +377,7 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr 
>> ccw_addr)
>>  }
>>  break;
>>  }
>> +fprintf(stderr, "CH DBG: %s: ret=%d\n", __func__, ret);
>>  sch->last_cmd = ccw;
>>  sch->last_cmd_valid = true;
>>  if (ret == 0) {
>>
>>
 Which qemu version is this, btw.?

>>>
>>> git from yesterday.
>>
>> Hm. Might be worth trying the s390-ccw-virtio-2.4 machine instead.
>>
> 
> No change.
> 
> With s390-ccw-virtio-2.4, I get:
> 
> Initializing cgroup subsys cpuset
> Initializing cgroup subsys cpu
> Initializing cgroup subsys cpuacct
> Linux version 4.3.0-rc7-8-gff230d6ec6b2
> (l...@amaluto.corp.amacapital.net) (gcc version 5.1.1 20150618 (Red
> Hat Cross 5.1.1-3) (GCC) ) #344 SMP Fri Oct 30 13:16:13 PDT 2015
> setup: Linux is running under KVM in 64-bit mode
> setup: Max memory size: 128MB
> Zone ranges:
>   DMA  [mem 0x-0x7fff]
>   Normal   empty
> Movable zone start for each node
> Early memory node ranges
>   node   0: [mem 0x-0x07ff]
> Initmem setup node 0 [mem 0x-0x07ff]
> On node 0 totalpages: 32768
>   DMA zone: 512 pages used for memmap
>   DMA zone: 0 pages reserved
>   DMA zone: 32768 pages, LIFO batch:7
> PERCPU: Embedded 466 pages/cpu @07605000 s1868032 r8192 d32512 
> u1908736
> pcpu-alloc: s1868032 r8192 d32512 u1908736 alloc=466*4096
> pcpu-alloc: [0] 0 [0] 1
> Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 32256
> Kernel command line:
> init=/home/luto/devel/virtme/virtme/guest/virtme-init
> psmouse.proto=exps "virtme_stty_con=rows 45 cols 150 iutf8"
> TERM=xterm-256color rootfstype=9p
> rootflags=version=9p2000.L,trans=virtio,access=any raid=noautodetect
> ro debug
> PID hash table entries: 512 (order: 0, 4096 bytes)
> Dentry cache hash table entries: 16384 (order: 5, 131072 bytes)
> Inode-cache hash table entries: 8192 (order: 4, 65536 bytes)
> Memory: 92520K/131072K available (8255K kernel code, 802K rwdata,


can you send your kernel config?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 3/4] KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings

2015-11-03 Thread Pavel Fedin
1. Remove unnecessary 'irq' argument, because irq number can be retrieved
   from the LR.
2. Since commit cff9211eb1a1f58ce7f5a2d596b617928fd4be0e ("arm/arm64: KVM:
   Fix arch timer behavior for disabled interrupts") LR_STATE_PENDING is
   queued back by vgic_retire_lr() itself. Also, it clears vlr.state
   itself. Therefore, we remove the same, now duplicated, check with all
   accompanying bit manipulations from vgic_unqueue_irqs().
3. vgic_retire_lr() is always accompanied by vgic_irq_clear_queued(). Since
   it already does more than just clearing the LR, move
   vgic_irq_clear_queued() inside of it.

Signed-off-by: Pavel Fedin 
---
 virt/kvm/arm/vgic.c | 37 ++---
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 265a410..96e45f3 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -105,7 +105,7 @@
 #include "vgic.h"
 
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
-static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
+static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu);
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
 static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu);
@@ -717,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 * interrupt then move the active state to the
 * distributor tracking bit.
 */
-   if (lr.state & LR_STATE_ACTIVE) {
+   if (lr.state & LR_STATE_ACTIVE)
vgic_irq_set_active(vcpu, lr.irq);
-   lr.state &= ~LR_STATE_ACTIVE;
-   }
 
/*
 * Reestablish the pending state on the distributor and the
-* CPU interface.  It may have already been pending, but that
-* is fine, then we are only setting a few bits that were
-* already set.
+* CPU interface and mark the LR as free for other use.
 */
-   if (lr.state & LR_STATE_PENDING) {
-   vgic_dist_irq_set_pending(vcpu, lr.irq);
-   lr.state &= ~LR_STATE_PENDING;
-   }
-
-   vgic_set_lr(vcpu, i, lr);
-
-   /*
-* Mark the LR as free for other use.
-*/
-   BUG_ON(lr.state & LR_STATE_MASK);
-   vgic_retire_lr(i, lr.irq, vcpu);
-   vgic_irq_clear_queued(vcpu, lr.irq);
+   vgic_retire_lr(i, vcpu);
 
/* Finally update the VGIC state. */
vgic_update_state(vcpu->kvm);
@@ -1099,16 +1083,18 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu)
vgic_ops->enable(vcpu);
 }
 
-static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
+static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu)
 {
struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
 
+   vgic_irq_clear_queued(vcpu, vlr.irq);
+
/*
 * We must transfer the pending state back to the distributor before
 * retiring the LR, otherwise we may loose edge-triggered interrupts.
 */
if (vlr.state & LR_STATE_PENDING) {
-   vgic_dist_irq_set_pending(vcpu, irq);
+   vgic_dist_irq_set_pending(vcpu, vlr.irq);
vlr.hwirq = 0;
}
 
@@ -1135,11 +1121,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu 
*vcpu)
for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
-   if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
-   vgic_retire_lr(lr, vlr.irq, vcpu);
-   if (vgic_irq_is_queued(vcpu, vlr.irq))
-   vgic_irq_clear_queued(vcpu, vlr.irq);
-   }
+   if (!vgic_irq_is_enabled(vcpu, vlr.irq))
+   vgic_retire_lr(lr, vcpu);
}
 }
 
-- 
2.4.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: VMX: Fix commit which broke PML

2015-11-03 Thread Paolo Bonzini


On 03/11/2015 06:49, Kai Huang wrote:
> I found PML was broken since below commit:
> 
>   commit feda805fe7c4ed9cf78158e73b1218752e3b4314
>   Author: Xiao Guangrong 
>   Date:   Wed Sep 9 14:05:55 2015 +0800
> 
>   KVM: VMX: unify SECONDARY_VM_EXEC_CONTROL update
> 
>   Unify the update in vmx_cpuid_update()
> 
>   Signed-off-by: Xiao Guangrong 
>   [Rewrite to use vmcs_set_secondary_exec_control. - Paolo]
>   Signed-off-by: Paolo Bonzini 
> 
> The reason is PML after above commit vmx_cpuid_update calls
> vmx_secondary_exec_control, in which PML is disabled unconditionally, as PML 
> is
> enabled in creating vcpu. Therefore if vcpu_cpuid_update is called after vcpu 
> is
> created, PML will be disabled unexpectedly while log-dirty code still think 
> PML
> is used. Actually looks calling vmx_secondary_exec_control in vmx_cpuid_update
> is likely to break any VMX features that is enabled/disabled on demand by
> updating SECONDARY_VM_EXEC_CONTROL, if vmx_cpuid_update is called between the
> feature is enabled and disabled.
> 
> Fix this by calling vmcs_read32 to read out SECONDARY_VM_EXEC_CONTROL 
> directly.

vmx_cpuid_update() is meant to be mostly idempotent; the parts that
depend on the current VMCS configuration are hidden in
vmcs_set_secondary_control.  So a better fix would be to add
SECONDARY_EXEC_ENABLE_PML to vmcs_set_secondary_exec_control's
"mask" variable.  However, you can see from the comment:

/*
 * These bits in the secondary execution controls field
 * are dynamic, the others are mostly based on the hypervisor
 * architecture and the guest's CPUID. Do not touch the
 * dynamic bits.
 */

that even this is not the optimal fix.  SECONDARY_EXEC_ENABLE_PML is
either always set or always clear, so it shouldn't be in "mask".

Instead, it should be in vmcs_config.cpu_based_2nd_exec_ctrl.  It isn't
because my review didn't notice this remnant of your original
implementation, which dynamically enabled/disabled PML.

In fact, cpu_has_vmx_pml() expects SECONDARY_EXEC_ENABLE_PML to be set
in vmcs_config.cpu_based_2nd_exec_ctrl, so it is a bit confusing to
remove the bit unconditionally in vmx_secondary_exec_control!

So I think SECONDARY_EXEC_ENABLE_PML should not be removed unconditionally
from exec_control in vmx_secondary_exec_control; the removal should be
conditional on !enable_pml, like we do for e.g. EPT or VPID.  If you do this,
vmx_enable_pml and vmx_disable_pml need not touch SECONDARY_VM_EXEC_CONTROL
anymore.  Do you agree?  If so, can you prepare a patch along these lines?

(Since you are at it, perhaps you can rename vmx_enable_pml and
vmx_disable_pml, since they will only allocate and free the PML page).

Thanks for reporting the issue!

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/3] KVM: x86: simplify RSM into 64-bit protected mode

2015-11-03 Thread Paolo Bonzini


On 03/11/2015 10:40, Laszlo Ersek wrote:
> On 11/02/15 10:32, Paolo Bonzini wrote:
>>
>>
>> On 31/10/2015 20:50, Laszlo Ersek wrote:
>>> Tested-by: Laszlo Ersek 
>>
>> Thanks Laszlo, I applied patches 1 and 2 (since your "part 2" never was :)).
>>
>> Paolo
>>
> 
> Thanks.
> 
> Since you can rebase the queue freely, can you please also add:
> 
> Reported-by: Laszlo Ersek 
> 
> to Radim's patch "KVM: x86: handle SMBASE as physical address in RSM"?

Sure, will do.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: fix eflags state following processor init/reset

2015-11-03 Thread Paolo Bonzini


On 28/10/2015 09:10, Nadav Amit wrote:
> Here are my 5 cents. Note that vmx_vcpu_reset calls:
> 
>   vmcs_writel(GUEST_RFLAGS, 0x02);
> 
> (And the RFLAGS value is not cached by KVM, so no consistency problem should
> occur.)
> 
> You may want to change the value into constant or call a wrapper function
> for setting RFLAGS, but I don’t see something broken in the functionality.

I agree.  Wanpeng, if this is just a cleanup, can you send v2 that
removes or modifies the existing call to vmcs_writel?  If there is a
bug, can you write a unit test for it?  It should be possible to test
for the problem using INIT+SIPI on an AP.

Thanks,

Paolo

> Regards,
> Nadav
> 
> Wanpeng Li  wrote:
> 
>> Ping, :-)
>> On 10/21/15 2:28 PM, Wanpeng Li wrote:
>>> Reference SDM 3.4.3:
>>>
>>> Following initialization of the processor (either by asserting the
>>> RESET pin or the INIT pin), the state of the EFLAGS register is
>>> 0002H.
>>>
>>> However, the eflags fixed bit is not set and other bits are also not
>>> cleared during the init/reset in kvm.
>>>
>>> This patch fix it by set eflags register to 0002H following
>>> initialization of the processor.
>>>
>>> Signed-off-by: Wanpeng Li 
>>> ---
>>>  arch/x86/kvm/vmx.c | 1 +
>>>  1 file changed, 1 insertion(+)
>>>
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index b680c2e..326f6ea 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -4935,6 +4935,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, 
>>> bool init_event)
>>> vmx_set_efer(vcpu, 0);
>>> vmx_fpu_activate(vcpu);
>>> update_exception_bitmap(vcpu);
>>> +   vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
>>> vpid_sync_context(vmx->vpid);
>>>  }
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 27/35] nvdimm acpi: build ACPI nvdimm devices

2015-11-03 Thread Igor Mammedov
On Mon,  2 Nov 2015 17:13:29 +0800
Xiao Guangrong  wrote:

> NVDIMM devices is defined in ACPI 6.0 9.20 NVDIMM Devices
> 
> There is a root device under \_SB and specified NVDIMM devices are under the
> root device. Each NVDIMM device has _ADR which returns its handle used to
> associate MEMDEV structure in NFIT
> 
> We reserve handle 0 for root device. In this patch, we save handle, handle,
> arg1 and arg2 to dsm memory. Arg3 is conditionally saved in later patch
> 
> Signed-off-by: Xiao Guangrong 
> ---
>  hw/acpi/nvdimm.c | 184 
> +++
>  1 file changed, 184 insertions(+)
> 
> diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
> index dd84e5f..53ed675 100644
> --- a/hw/acpi/nvdimm.c
> +++ b/hw/acpi/nvdimm.c
> @@ -368,6 +368,15 @@ static void nvdimm_build_nfit(GSList *device_list, 
> GArray *table_offsets,
>  g_array_free(structures, true);
>  }
>  
> +struct NvdimmDsmIn {
> +uint32_t handle;
> +uint32_t revision;
> +uint32_t function;
> +   /* the remaining size in the page is used by arg3. */
> +uint8_t arg3[0];
> +} QEMU_PACKED;
> +typedef struct NvdimmDsmIn NvdimmDsmIn;
> +
>  static uint64_t
>  nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
>  {
> @@ -377,6 +386,7 @@ nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
>  static void
>  nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
>  {
> +fprintf(stderr, "BUG: we never write DSM notification IO Port.\n");
it doesn't seem like this hunk belongs here

>  }
>  
>  static const MemoryRegionOps nvdimm_dsm_ops = {
> @@ -402,6 +412,179 @@ void nvdimm_init_acpi_state(MemoryRegion *memory, 
> MemoryRegion *io,
>  memory_region_add_subregion(io, NVDIMM_ACPI_IO_BASE, >io_mr);
>  }
>  
> +#define BUILD_STA_METHOD(_dev_, _method_)  \
> +do {   \
> +_method_ = aml_method("_STA", 0);  \
> +aml_append(_method_, aml_return(aml_int(0x0f)));   \
> +aml_append(_dev_, _method_);   \
> +} while (0)
_STA doesn't have any logic here so drop macro and just
replace its call sites with:

aml_append(foo_dev, aml_name_decl("_STA", aml_int(0xf));


> +
> +#define BUILD_DSM_METHOD(_dev_, _method_, _handle_, _uuid_)\
> +do {   \
> +Aml *ifctx, *uuid; \
> +_method_ = aml_method("_DSM", 4);  \
> +/* check UUID if it is we expect, return the errorcode if not.*/   \
> +uuid = aml_touuid(_uuid_); \
> +ifctx = aml_if(aml_lnot(aml_equal(aml_arg(0), uuid))); \
> +aml_append(ifctx, aml_return(aml_int(1 /* Not Supported */))); \
> +aml_append(method, ifctx); \
> +aml_append(method, aml_return(aml_call4("NCAL", aml_int(_handle_), \
> +   aml_arg(1), aml_arg(2), aml_arg(3;  \
> +aml_append(_dev_, _method_);   \
> +} while (0)
> +
> +#define BUILD_FIELD_UNIT_SIZE(_field_, _byte_, _name_) \
> +aml_append(_field_, aml_named_field(_name_, (_byte_) * BITS_PER_BYTE))
> +
> +#define BUILD_FIELD_UNIT_STRUCT(_field_, _s_, _f_, _name_) \
> +BUILD_FIELD_UNIT_SIZE(_field_, sizeof(typeof_field(_s_, _f_)), _name_)
> +
> +static void build_nvdimm_devices(GSList *device_list, Aml *root_dev)
> +{
> +for (; device_list; device_list = device_list->next) {
> +NVDIMMDevice *nvdimm = device_list->data;
> +int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP,
> +   NULL);
> +uint32_t handle = nvdimm_slot_to_handle(slot);
> +Aml *dev, *method;
> +
> +dev = aml_device("NV%02X", slot);
> +aml_append(dev, aml_name_decl("_ADR", aml_int(handle)));
> +
> +BUILD_STA_METHOD(dev, method);
> +
> +/*
> + * Chapter 4: _DSM Interface for NVDIMM Device (non-root) - Example
> + * in DSM Spec Rev1.
> + */
> +BUILD_DSM_METHOD(dev, method,
> + handle /* NVDIMM Device Handle */,
> + "4309AC30-0D11-11E4-9191-0800200C9A66"
> + /* UUID for NVDIMM Devices. */);
this will add N-bytes * #NVDIMMS in worst case.
Please drop macro and just consolidate this method into _DSM method of parent 
scope
and then call it from here like this:
   Method(_DSM, 4)
   Return(^_DSM(Arg[0-3]))

> +
> +aml_append(root_dev, dev);
> +}
> +}
> +
> +static void nvdimm_build_acpi_devices(GSList 

Re: [kvm-unit-tests PATCH 00/14] ppc64: initial drop

2015-11-03 Thread Paolo Bonzini


On 03/11/2015 08:08, Thomas Huth wrote:
> On 03/08/15 16:41, Andrew Jones wrote:
>> > This series is the first series of a series of series that will
>> > bring support to kvm-unit-tests for ppc64, and eventually ppc64le.
>  Hi Andrew,
> 
> may I ask about the current state of ppc64 support in the
> kvm-unit-tests? Is there a newer version available than the one you
> posted three months ago?

I've been a slob with all the kvm-unit-tests patches.  Andrew, can you
send a single submission of all the patches, so that I can review them
and apply them?

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: VMX: fix SMEP and SMAP without EPT

2015-11-03 Thread Paolo Bonzini


On 02/11/2015 22:20, Radim Krčmář wrote:
> The comment in code had it mostly right, but we enable paging for
> emulated real mode regardless of EPT.
> 
> Without EPT (which implies emulated real mode), secondary VCPUs won't
> start unless we disable SM[AE]P when the guest doesn't use paging.
> 
> Signed-off-by: Radim Krčmář 
> ---
>  arch/x86/kvm/vmx.c | 19 ++-
>  1 file changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index b680c2e0e8a3..ab598558a7a4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3788,20 +3788,21 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, 
> unsigned long cr4)
>   if (!is_paging(vcpu)) {
>   hw_cr4 &= ~X86_CR4_PAE;
>   hw_cr4 |= X86_CR4_PSE;
> - /*
> -  * SMEP/SMAP is disabled if CPU is in non-paging mode
> -  * in hardware. However KVM always uses paging mode to
> -  * emulate guest non-paging mode with TDP.
> -  * To emulate this behavior, SMEP/SMAP needs to be
> -  * manually disabled when guest switches to non-paging
> -  * mode.
> -  */
> - hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
>   } else if (!(cr4 & X86_CR4_PAE)) {
>   hw_cr4 &= ~X86_CR4_PAE;
>   }
>   }
>  
> + if (!enable_unrestricted_guest && !is_paging(vcpu))
> + /*
> +  * SMEP/SMAP is disabled if CPU is in non-paging mode in
> +  * hardware.  However KVM always uses paging mode without
> +  * unrestricted guest.
> +  * To emulate this behavior, SMEP/SMAP needs to be manually
> +  * disabled when guest switches to non-paging mode.
> +  */
> + hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
> +
>   vmcs_writel(CR4_READ_SHADOW, cr4);
>   vmcs_writel(GUEST_CR4, hw_cr4);
>   return 0;
> 

Applied with Cc: sta...@vger.kernel.org.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 09/35] exec: allow file_ram_alloc to work on file

2015-11-03 Thread Igor Mammedov
On Mon,  2 Nov 2015 17:13:11 +0800
Xiao Guangrong  wrote:

> Currently, file_ram_alloc() only works on directory - it creates a file
> under @path and do mmap on it
> 
> This patch tries to allow it to work on file directly, if @path is a
> directory it works as before, otherwise it treats @path as the target
> file then directly allocate memory from it
Paolo has just queued
https://lists.gnu.org/archive/html/qemu-devel/2015-10/msg06513.html
perhaps that's what you can reuse here.
> 
> Signed-off-by: Xiao Guangrong 
> ---
>  exec.c | 80 
> ++
>  1 file changed, 51 insertions(+), 29 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 9075f4d..db0fdaf 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1174,14 +1174,60 @@ void qemu_mutex_unlock_ramlist(void)
>  }
>  
>  #ifdef __linux__
> +static bool path_is_dir(const char *path)
> +{
> +struct stat fs;
> +
> +return stat(path, ) == 0 && S_ISDIR(fs.st_mode);
> +}
> +
> +static int open_ram_file_path(RAMBlock *block, const char *path, size_t size)
> +{
> +char *filename;
> +char *sanitized_name;
> +char *c;
> +int fd;
> +
> +if (!path_is_dir(path)) {
> +int flags = (block->flags & RAM_SHARED) ? O_RDWR : O_RDONLY;
> +
> +flags |= O_EXCL;
> +return open(path, flags);
> +}
> +
> +/* Make name safe to use with mkstemp by replacing '/' with '_'. */
> +sanitized_name = g_strdup(memory_region_name(block->mr));
> +for (c = sanitized_name; *c != '\0'; c++) {
> +if (*c == '/') {
> +*c = '_';
> +}
> +}
> +filename = g_strdup_printf("%s/qemu_back_mem.%s.XX", path,
> +   sanitized_name);
> +g_free(sanitized_name);
> +fd = mkstemp(filename);
> +if (fd >= 0) {
> +unlink(filename);
> +/*
> + * ftruncate is not supported by hugetlbfs in older
> + * hosts, so don't bother bailing out on errors.
> + * If anything goes wrong with it under other filesystems,
> + * mmap will fail.
> + */
> +if (ftruncate(fd, size)) {
> +perror("ftruncate");
> +}
> +}
> +g_free(filename);
> +
> +return fd;
> +}
> +
>  static void *file_ram_alloc(RAMBlock *block,
>  ram_addr_t memory,
>  const char *path,
>  Error **errp)
>  {
> -char *filename;
> -char *sanitized_name;
> -char *c;
>  void *area;
>  int fd;
>  uint64_t pagesize;
> @@ -1212,38 +1258,14 @@ static void *file_ram_alloc(RAMBlock *block,
>  goto error;
>  }
>  
> -/* Make name safe to use with mkstemp by replacing '/' with '_'. */
> -sanitized_name = g_strdup(memory_region_name(block->mr));
> -for (c = sanitized_name; *c != '\0'; c++) {
> -if (*c == '/')
> -*c = '_';
> -}
> -
> -filename = g_strdup_printf("%s/qemu_back_mem.%s.XX", path,
> -   sanitized_name);
> -g_free(sanitized_name);
> +memory = ROUND_UP(memory, pagesize);
>  
> -fd = mkstemp(filename);
> +fd = open_ram_file_path(block, path, memory);
>  if (fd < 0) {
>  error_setg_errno(errp, errno,
>   "unable to create backing store for path %s", path);
> -g_free(filename);
>  goto error;
>  }
> -unlink(filename);
> -g_free(filename);
> -
> -memory = ROUND_UP(memory, pagesize);
> -
> -/*
> - * ftruncate is not supported by hugetlbfs in older
> - * hosts, so don't bother bailing out on errors.
> - * If anything goes wrong with it under other filesystems,
> - * mmap will fail.
> - */
> -if (ftruncate(fd, memory)) {
> -perror("ftruncate");
> -}
>  
>  area = qemu_ram_mmap(fd, memory, pagesize, block->flags & RAM_SHARED);
>  if (area == MAP_FAILED) {

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 9/9] kvm/x86: Hyper-V kvm exit

2015-11-03 Thread Paolo Bonzini


On 22/10/2015 18:10, Andrey Smetanin wrote:
> A new vcpu exit is introduced to notify the userspace of the
> changes in Hyper-V SynIC configuration triggered by guest writing to the
> corresponding MSRs.
> 
> Changes v3:
> * added KVM_EXIT_HYPERV types and structs notes into docs
> 
> Signed-off-by: Andrey Smetanin 
> Reviewed-by: Roman Kagan 
> Signed-off-by: Denis V. Lunev 
> CC: Vitaly Kuznetsov 
> CC: "K. Y. Srinivasan" 
> CC: Gleb Natapov 
> CC: Paolo Bonzini 
> CC: Roman Kagan 
> 
> ---
>  Documentation/virtual/kvm/api.txt | 22 ++
>  arch/x86/include/asm/kvm_host.h   |  1 +
>  arch/x86/kvm/hyperv.c | 17 +
>  arch/x86/kvm/x86.c|  6 ++
>  include/linux/kvm_host.h  |  1 +
>  include/uapi/linux/kvm.h  | 17 +
>  6 files changed, 64 insertions(+)
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index 8710418..a6858eb 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3337,6 +3337,28 @@ the userspace IOAPIC should process the EOI and 
> retrigger the interrupt if
>  it is still asserted.  Vector is the LAPIC interrupt vector for which the
>  EOI was received.
>  
> + struct kvm_hyperv_exit {
> +#define KVM_EXIT_HYPERV_SYNIC  1
> + __u32 type;
> + union {
> + struct {
> + __u32 msr;
> + __u64 control;
> + __u64 evt_page;
> + __u64 msg_page;
> + } synic;
> + } u;
> + };
> + /* KVM_EXIT_HYPERV */
> +struct kvm_hyperv_exit hyperv;
> +Indicates that the VCPU exits into userspace to process some tasks
> +related to Hyper-V emulation.
> +Valid values for 'type' are:
> + KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about
> +Hyper-V SynIC state change. Notification is used to remap SynIC
> +event/message pages and to enable/disable SynIC messages/events processing
> +in userspace.
> +
>   /* Fix the size of the union. */
>   char padding[256];
>   };
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 8434f88..54c90d3 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -392,6 +392,7 @@ struct kvm_vcpu_hv {
>   u64 hv_vapic;
>   s64 runtime_offset;
>   struct kvm_vcpu_hv_synic synic;
> + struct kvm_hyperv_exit exit;
>  };
>  
>  struct kvm_vcpu_arch {
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 8ff71f3..9443920 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -129,6 +129,20 @@ static void kvm_hv_notify_acked_sint(struct kvm_vcpu 
> *vcpu, u32 sint)
>   srcu_read_unlock(>irq_srcu, idx);
>  }
>  
> +static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
> +{
> + struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
> + struct kvm_vcpu_hv *hv_vcpu = >arch.hyperv;
> +
> + hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
> + hv_vcpu->exit.u.synic.msr = msr;
> + hv_vcpu->exit.u.synic.control = synic->control;
> + hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
> + hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
> +
> + kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
> +}
> +
>  static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
>u32 msr, u64 data, bool host)
>  {
> @@ -141,6 +155,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
>   switch (msr) {
>   case HV_X64_MSR_SCONTROL:
>   synic->control = data;
> + synic_exit(synic, msr);

Another note.  I am getting:

EAX= EBX= ECX= EDX=0663
ESI= EDI= EBP= ESP=
EIP=fff0 EFL=0002 [---] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =   9300
CS =f000   9b00
SS =   9300
DS =   9300
FS =   9300
GS =   9300
LDT=   8200
TR =   8b00
GDT=  
IDT=  
CR0=6010 CR2= CR3= CR4=
DR0= DR1= DR2=
DR3=
DR6=0ff0 DR7=0400
EFER=
Code=90 90 90 90 eb c3 90 90 90 90 90 90 00 00 00 00 56 54 46 00 <90> 90
eb ac 90 90 90 90 90 90 90 90 90 90 90 90 00 00 00 00 00 00 00 00 00 00
00 00 00 00

if I run a patched QEMU but I *do not* enable the synthetic 

Re: [PATCH v3 9/9] kvm/x86: Hyper-V kvm exit

2015-11-03 Thread Andrey Smetanin



On 11/03/2015 04:28 PM, Paolo Bonzini wrote:



On 22/10/2015 18:10, Andrey Smetanin wrote:

A new vcpu exit is introduced to notify the userspace of the
changes in Hyper-V SynIC configuration triggered by guest writing to the
corresponding MSRs.

Changes v3:
* added KVM_EXIT_HYPERV types and structs notes into docs

Signed-off-by: Andrey Smetanin 
Reviewed-by: Roman Kagan 
Signed-off-by: Denis V. Lunev 
CC: Vitaly Kuznetsov 
CC: "K. Y. Srinivasan" 
CC: Gleb Natapov 
CC: Paolo Bonzini 
CC: Roman Kagan 

---
  Documentation/virtual/kvm/api.txt | 22 ++
  arch/x86/include/asm/kvm_host.h   |  1 +
  arch/x86/kvm/hyperv.c | 17 +
  arch/x86/kvm/x86.c|  6 ++
  include/linux/kvm_host.h  |  1 +
  include/uapi/linux/kvm.h  | 17 +
  6 files changed, 64 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 8710418..a6858eb 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3337,6 +3337,28 @@ the userspace IOAPIC should process the EOI and 
retrigger the interrupt if
  it is still asserted.  Vector is the LAPIC interrupt vector for which the
  EOI was received.

+   struct kvm_hyperv_exit {
+#define KVM_EXIT_HYPERV_SYNIC  1
+   __u32 type;
+   union {
+   struct {
+   __u32 msr;
+   __u64 control;
+   __u64 evt_page;
+   __u64 msg_page;
+   } synic;
+   } u;
+   };
+   /* KVM_EXIT_HYPERV */
+struct kvm_hyperv_exit hyperv;
+Indicates that the VCPU exits into userspace to process some tasks
+related to Hyper-V emulation.
+Valid values for 'type' are:
+   KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about
+Hyper-V SynIC state change. Notification is used to remap SynIC
+event/message pages and to enable/disable SynIC messages/events processing
+in userspace.
+
/* Fix the size of the union. */
char padding[256];
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8434f88..54c90d3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -392,6 +392,7 @@ struct kvm_vcpu_hv {
u64 hv_vapic;
s64 runtime_offset;
struct kvm_vcpu_hv_synic synic;
+   struct kvm_hyperv_exit exit;
  };

  struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 8ff71f3..9443920 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -129,6 +129,20 @@ static void kvm_hv_notify_acked_sint(struct kvm_vcpu 
*vcpu, u32 sint)
srcu_read_unlock(>irq_srcu, idx);
  }

+static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
+{
+   struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+   struct kvm_vcpu_hv *hv_vcpu = >arch.hyperv;
+
+   hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
+   hv_vcpu->exit.u.synic.msr = msr;
+   hv_vcpu->exit.u.synic.control = synic->control;
+   hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
+   hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
+
+   kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
+}
+
  static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 u32 msr, u64 data, bool host)
  {
@@ -141,6 +155,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
switch (msr) {
case HV_X64_MSR_SCONTROL:
synic->control = data;
+   synic_exit(synic, msr);


Another note.  I am getting:

EAX= EBX= ECX= EDX=0663
ESI= EDI= EBP= ESP=
EIP=fff0 EFL=0002 [---] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =   9300
CS =f000   9b00
SS =   9300
DS =   9300
FS =   9300
GS =   9300
LDT=   8200
TR =   8b00
GDT=  
IDT=  
CR0=6010 CR2= CR3= CR4=
DR0= DR1= DR2=
DR3=
DR6=0ff0 DR7=0400
EFER=
Code=90 90 90 90 eb c3 90 90 90 90 90 90 00 00 00 00 56 54 46 00 <90> 90
eb ac 90 90 90 90 90 90 90 90 90 90 90 90 00 00 00 00 00 00 00 00 00 00
00 00 00 00

if I run a patched QEMU but I *do not* enable the synthetic interrupt
controller.  I can fix it by wrapping the calls to synic_exit with 

Re: [PATCH 3/3] s390/dma: Allow per device dma ops

2015-11-03 Thread Cornelia Huck
On Tue,  3 Nov 2015 12:54:39 +0100
Christian Borntraeger  wrote:

> As virtio-ccw now has dma ops, we can no longer default to the PCI ones.
> Make use of dev_archdata to keep the dma_ops per device. The pci devices
> now use that to override the default, and the default is changed to use
> the noop ops for everything that is not PCI. To compile without PCI
> support we also have to enable the DMA api with virtio.

Not only with virtio, but generally, right?

> Signed-off-by: Christian Borntraeger 
> Reviewed-by: Joerg Roedel 
> Acked-by: Sebastian Ott 
> ---
>  arch/s390/Kconfig   | 3 ++-
>  arch/s390/include/asm/device.h  | 6 +-
>  arch/s390/include/asm/dma-mapping.h | 6 --
>  arch/s390/pci/pci.c | 1 +
>  arch/s390/pci/pci_dma.c | 4 ++--
>  5 files changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
> index 1d57000..04f0e02 100644
> --- a/arch/s390/Kconfig
> +++ b/arch/s390/Kconfig
> @@ -113,6 +113,7 @@ config S390
>   select GENERIC_FIND_FIRST_BIT
>   select GENERIC_SMP_IDLE_THREAD
>   select GENERIC_TIME_VSYSCALL
> + select HAS_DMA
>   select HAVE_ALIGNED_STRUCT_PAGE if SLUB
>   select HAVE_ARCH_AUDITSYSCALL
>   select HAVE_ARCH_EARLY_PFN_TO_NID
> @@ -124,6 +125,7 @@ config S390
>   select HAVE_CMPXCHG_DOUBLE
>   select HAVE_CMPXCHG_LOCAL
>   select HAVE_DEBUG_KMEMLEAK
> + select HAVE_DMA_ATTRS
>   select HAVE_DYNAMIC_FTRACE
>   select HAVE_DYNAMIC_FTRACE_WITH_REGS
>   select HAVE_FTRACE_MCOUNT_RECORD
> @@ -580,7 +582,6 @@ config QDIO
> 
>  menuconfig PCI
>   bool "PCI support"
> - select HAVE_DMA_ATTRS
>   select PCI_MSI
>   help
> Enable PCI support.

Hm. Further down in this file, there's

config HAS_DMA  
def_bool PCI
select HAVE_DMA_API_DEBUG

Should we maybe select HAVE_DMA_API_DEBUG above, drop the HAS_DMA
config option and rely on not defining NO_DMA instead?

Otherwise, the patch looks good to me.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH v7 06/35] acpi: add aml_method_serialized

2015-11-03 Thread Igor Mammedov
On Mon,  2 Nov 2015 17:13:08 +0800
Xiao Guangrong  wrote:

> It avoid explicit Mutex and will be used by NVDIMM ACPI
> 
> Signed-off-by: Xiao Guangrong 
> ---
>  hw/acpi/aml-build.c | 26 --
>  include/hw/acpi/aml-build.h |  1 +
>  2 files changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index 9f792ab..8bee8b2 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -696,14 +696,36 @@ Aml *aml_while(Aml *predicate)
>  }
>  
>  /* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefMethod */
> -Aml *aml_method(const char *name, int arg_count)
> +static Aml *__aml_method(const char *name, int arg_count, bool serialized)
We don't have many users of aml_method() yet, so I'd prefer to have a single
vs multiple function call:

I suggest to do something like:
typedef enum {
AML_NONSERIALIZED = 0,
AML_SERIALIZED = 1,
} AmlSerializeRule;

aml_method(const char *name, AmlSerializeRule rule, int synclevel);

with current users fixed up with AML_NONSERIALIZED argument. 

>  {
>  Aml *var = aml_bundle(0x14 /* MethodOp */, AML_PACKAGE);
> +int methodflags;
> +
> +/*
> + * MethodFlags:
> + *   bit 0-2: ArgCount (0-7)
> + *   bit 3: SerializeFlag
> + * 0: NotSerialized
> + * 1: Serialized
> + *   bit 4-7: reserved (must be 0)
> + */
> +assert(!(arg_count & ~7));
> +methodflags = arg_count | (serialized << 3);
>  build_append_namestring(var->buf, "%s", name);
> -build_append_byte(var->buf, arg_count); /* MethodFlags: ArgCount */
> +build_append_byte(var->buf, methodflags);
>  return var;
>  }
>  
> +Aml *aml_method(const char *name, int arg_count)
> +{
> +return __aml_method(name, arg_count, false);
> +}
> +
> +Aml *aml_method_serialized(const char *name, int arg_count)
> +{
> +return __aml_method(name, arg_count, true);
> +}
> +
>  /* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefDevice */
>  Aml *aml_device(const char *name_format, ...)
>  {
> diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
> index 5b8a118..00cf40e 100644
> --- a/include/hw/acpi/aml-build.h
> +++ b/include/hw/acpi/aml-build.h
> @@ -263,6 +263,7 @@ Aml *aml_qword_memory(AmlDecode dec, AmlMinFixed 
> min_fixed,
>  Aml *aml_scope(const char *name_format, ...) GCC_FMT_ATTR(1, 2);
>  Aml *aml_device(const char *name_format, ...) GCC_FMT_ATTR(1, 2);
>  Aml *aml_method(const char *name, int arg_count);
> +Aml *aml_method_serialized(const char *name, int arg_count);
>  Aml *aml_if(Aml *predicate);
>  Aml *aml_else(void);
>  Aml *aml_while(Aml *predicate);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v1 2/2] dma-mapping-common: add DMA attribute - DMA_ATTR_IOMMU_BYPASS

2015-11-03 Thread Christoph Hellwig
On Tue, Nov 03, 2015 at 10:08:13AM +1100, Benjamin Herrenschmidt wrote:
> On Mon, 2015-11-02 at 22:45 +0100, Arnd Bergmann wrote:
> > > Then I would argue for naming this differently. Make it an optional
> > > hint "DMA_ATTR_HIGH_PERF" or something like that. Whether this is
> > > achieved via using a bypass or other means in the backend not the
> > > business of the driver.
> > > 
> > 
> > With a name like that, who wouldn't pass that flag? ;-)
> 
> xHCI for example, vs. something like 10G ethernet... but yes I agree it
> sucks. I don't like that sort of policy anywhere in drivers. On the
> other hand the platform doesn't have much information to make that sort
> of decision either.

Mabye because it should simply use what's optimal?  E.g. passthrough
whenever possible, where arguments against possible are:  dma_mask, vfio
requirements, kernel command line option.  This is what a lot of
architectures already do, I remember the SGI Origin / Altix code has the
same behavior as well.  Those IOMMUs already had the 64 bit passthrough
and 32-bit sliding window in addition to the real IOMMU 10 years ago.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: x86: allow RSM from 64-bit mode

2015-11-03 Thread Paolo Bonzini
The SDM says that exiting system management mode from 64-bit mode
is invalid, but that would be too good to be true.  But actually,
most of the code is already there to support exiting from compat
mode (EFER.LME=1, EFER.LMA=0).  Getting all the way from 64-bit
mode to real mode only requires clearing CS.L and CR4.PCIDE.

Cc: sta...@vger.kernel.org
Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c
Cc: Laszlo Ersek 
Cc: Radim Krčmář 
Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/emulate.c | 30 +-
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b60fed56671b..1505587d06e9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2484,16 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 
/*
 * Get back to real mode, to prepare a safe state in which to load
-* CR0/CR3/CR4/EFER.
-*
-* CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
+* CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
+* supports long mode.
 */
+   cr4 = ctxt->ops->get_cr(ctxt, 4);
+   if (emulator_has_longmode(ctxt)) {
+   struct desc_struct cs_desc;
+
+   /* Zero CR4.PCIDE before CR0.PG.  */
+   if (cr4 & X86_CR4_PCIDE) {
+   ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+   cr4 &= ~X86_CR4_PCIDE;
+   }
+
+   /* A 32-bit code segment is required to clear EFER.LMA.  */
+   memset(_desc, 0, sizeof(cs_desc));
+   cs_desc.type = 0xb;
+   cs_desc.s = cs_desc.g = cs_desc.p = 1;
+   ctxt->ops->set_segment(ctxt, 0, _desc, 0, VCPU_SREG_CS);
+   }
+
+   /* For the 64-bit case, this will clear EFER.LMA.  */
cr0 = ctxt->ops->get_cr(ctxt, 0);
if (cr0 & X86_CR0_PE)
ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
-   cr4 = ctxt->ops->get_cr(ctxt, 4);
+
+   /* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
if (cr4 & X86_CR4_PAE)
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+
+   /* And finally go back to 32-bit mode.  */
efer = 0;
ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
 
@@ -4454,7 +4474,7 @@ static const struct opcode twobyte_table[256] = {
F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
/* 0xA8 - 0xAF */
I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
-   II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
+   II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] KVM: x86: Reset RFLAGS state following processor init/reset

2015-11-03 Thread Wanpeng Li
Reference SDM Volume 1 3.4.3:

Following initialization of the processor (either by asserting the 
RESET pin or the INIT pin), the state of the EFLAGS register is 
0002H.

However, the eflags fixed bit is not set and other bits are also not 
cleared during the init/reset in kvm.

This patch reset eflags register to 0002H following initialization 
of the processor.

Signed-off-by: Wanpeng Li 
---
v1 -> v2:
 * use vmcs_writel

 arch/x86/kvm/vmx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b680c2e..1a95ef7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4935,6 +4935,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool 
init_event)
vmx_set_efer(vcpu, 0);
vmx_fpu_activate(vcpu);
update_exception_bitmap(vcpu);
+   vmcs_writel(GUEST_RFLAGS, X86_EFLAGS_FIXED);
 
vpid_sync_context(vmx->vpid);
 }
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] dma: Provide simple noop dma ops

2015-11-03 Thread Christian Borntraeger
We are going to require dma_ops for several common drivers, even for
systems that do have an identity mapping. Lets provide some minimal
no-op dma_ops that can be used for that purpose.

Signed-off-by: Christian Borntraeger 
---
 include/linux/dma-mapping.h |  2 ++
 lib/Makefile|  1 +
 lib/dma-noop.c  | 75 +
 3 files changed, 78 insertions(+)
 create mode 100644 lib/dma-noop.c

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ac07ff0..7912f54 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -66,6 +66,8 @@ struct dma_map_ops {
int is_phys;
 };
 
+extern struct dma_map_ops dma_noop_ops;
+
 #define DMA_BIT_MASK(n)(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
 
 #define DMA_MASK_NONE  0x0ULL
diff --git a/lib/Makefile b/lib/Makefile
index 13a7c6a..92d6135 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,6 +18,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
+lib-$(CONFIG_HAS_DMA) += dma-noop.o
 
 lib-y  += kobject.o klist.o
 obj-y  += lockref.o
diff --git a/lib/dma-noop.c b/lib/dma-noop.c
new file mode 100644
index 000..7214564
--- /dev/null
+++ b/lib/dma-noop.c
@@ -0,0 +1,75 @@
+/*
+ * lib/dma-noop.c
+ *
+ * Simple DMA noop-ops that map 1:1 with memory
+ */
+#include 
+#include 
+#include 
+#include 
+
+static void *dma_noop_alloc(struct device *dev, size_t size,
+   dma_addr_t *dma_handle, gfp_t gfp,
+   struct dma_attrs *attrs)
+{
+   void *ret;
+
+   ret = (void *)__get_free_pages(gfp, get_order(size));
+   if (ret)
+   *dma_handle = virt_to_phys(ret);
+   return ret;
+}
+
+static void dma_noop_free(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_addr,
+ struct dma_attrs *attrs)
+{
+   free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+   return page_to_phys(page) + offset;
+}
+
+static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int 
nents,
+enum dma_data_direction dir, struct dma_attrs 
*attrs)
+{
+   int i;
+   struct scatterlist *sg;
+
+   for_each_sg(sgl, sg, nents, i) {
+   void *va;
+
+   BUG_ON(!sg_page(sg));
+   va = sg_virt(sg);
+   sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va);
+   sg_dma_len(sg) = sg->length;
+   }
+
+   return nents;
+}
+
+static int dma_noop_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return 0;
+}
+
+static int dma_noop_supported(struct device *dev, u64 mask)
+{
+   return 1;
+}
+
+struct dma_map_ops dma_noop_ops = {
+   .alloc  = dma_noop_alloc,
+   .free   = dma_noop_free,
+   .map_page   = dma_noop_map_page,
+   .map_sg = dma_noop_map_sg,
+   .mapping_error  = dma_noop_mapping_error,
+   .dma_supported  = dma_noop_supported,
+};
+
+EXPORT_SYMBOL(dma_noop_ops);
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] alpha/dma: use common noop dma ops

2015-11-03 Thread Christian Borntraeger
Some of the alpha pci noop dma ops are identical to the common ones.
Use them.

Signed-off-by: Christian Borntraeger 
Reviewed-by: Joerg Roedel 
---
 arch/alpha/kernel/pci-noop.c | 46 
 1 file changed, 4 insertions(+), 42 deletions(-)

diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index 2b1f4a1..8e735b5e 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -123,44 +123,6 @@ static void *alpha_noop_alloc_coherent(struct device *dev, 
size_t size,
return ret;
 }
 
-static void alpha_noop_free_coherent(struct device *dev, size_t size,
-void *cpu_addr, dma_addr_t dma_addr,
-struct dma_attrs *attrs)
-{
-   free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-static dma_addr_t alpha_noop_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- struct dma_attrs *attrs)
-{
-   return page_to_pa(page) + offset;
-}
-
-static int alpha_noop_map_sg(struct device *dev, struct scatterlist *sgl, int 
nents,
-enum dma_data_direction dir, struct dma_attrs 
*attrs)
-{
-   int i;
-   struct scatterlist *sg;
-
-   for_each_sg(sgl, sg, nents, i) {
-   void *va;
-
-   BUG_ON(!sg_page(sg));
-   va = sg_virt(sg);
-   sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va);
-   sg_dma_len(sg) = sg->length;
-   }
-
-   return nents;
-}
-
-static int alpha_noop_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-   return 0;
-}
-
 static int alpha_noop_supported(struct device *dev, u64 mask)
 {
return mask < 0x00ffUL ? 0 : 1;
@@ -168,10 +130,10 @@ static int alpha_noop_supported(struct device *dev, u64 
mask)
 
 struct dma_map_ops alpha_noop_ops = {
.alloc  = alpha_noop_alloc_coherent,
-   .free   = alpha_noop_free_coherent,
-   .map_page   = alpha_noop_map_page,
-   .map_sg = alpha_noop_map_sg,
-   .mapping_error  = alpha_noop_mapping_error,
+   .free   = dma_noop_free_coherent,
+   .map_page   = dma_noop_map_page,
+   .map_sg = dma_noop_map_sg,
+   .mapping_error  = dma_noop_mapping_error,
.dma_supported  = alpha_noop_supported,
 };
 
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv3 0/3] dma ops and virtio

2015-11-03 Thread Christian Borntraeger
Andy,
this is the next and hopefully last version. Seems to work fine
when replacing the previous patches in your tree. I have some
reviews/acks for patch 2 and 3. patch 1 still needs one ack.
Can you replace the patches in your tree and carry them along
with your changes?

old introduction:
-
There are some attempts to unify the dma ops (Christoph) as well
as some attempts to make virtio use the dma API (Andy).

At kernel summit we concluded that we want to use the same code on all
platforms, whereever possible, so having a dummy dma_op might be the
easiest solution to keep virtio-ccw as similar as possible to
virtio-pci.Together with a fixed up patch set from Andy Lutomirski
this seems to work.  

We will also need a fixup for powerc and QEMU changes to make virtio
work with iommu on power and x86.


v2->v3:
- make dma-noop depend on HAS_DMA
- do not do memset on alloc
- prefix patches with *dma*
v1->v2:
- initial testing
- always use dma_noop_ops if device has no private dma_ops
- get rid of setup in virtio_ccw,kvm_virtio
- set CONFIG_HAS_DMA(ATTRS) for virtio (fixes compile for !PCI)
- rename s390_dma_ops to s390_pci_dma_ops


Christian Borntraeger (3):
  dma: Provide simple noop dma ops
  alpha/dma: use common noop dma ops
  s390/dma: Allow per device dma ops

 arch/alpha/kernel/pci-noop.c| 46 ++-
 arch/s390/Kconfig   |  3 +-
 arch/s390/include/asm/device.h  |  6 ++-
 arch/s390/include/asm/dma-mapping.h |  6 ++-
 arch/s390/pci/pci.c |  1 +
 arch/s390/pci/pci_dma.c |  4 +-
 include/linux/dma-mapping.h |  2 +
 lib/Makefile|  1 +
 lib/dma-noop.c  | 75 +
 9 files changed, 96 insertions(+), 48 deletions(-)
 create mode 100644 lib/dma-noop.c

-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] s390/dma: Allow per device dma ops

2015-11-03 Thread Christian Borntraeger
As virtio-ccw now has dma ops, we can no longer default to the PCI ones.
Make use of dev_archdata to keep the dma_ops per device. The pci devices
now use that to override the default, and the default is changed to use
the noop ops for everything that is not PCI. To compile without PCI
support we also have to enable the DMA api with virtio.

Signed-off-by: Christian Borntraeger 
Reviewed-by: Joerg Roedel 
Acked-by: Sebastian Ott 
---
 arch/s390/Kconfig   | 3 ++-
 arch/s390/include/asm/device.h  | 6 +-
 arch/s390/include/asm/dma-mapping.h | 6 --
 arch/s390/pci/pci.c | 1 +
 arch/s390/pci/pci_dma.c | 4 ++--
 5 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1d57000..04f0e02 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -113,6 +113,7 @@ config S390
select GENERIC_FIND_FIRST_BIT
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
+   select HAS_DMA
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_EARLY_PFN_TO_NID
@@ -124,6 +125,7 @@ config S390
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
select HAVE_DEBUG_KMEMLEAK
+   select HAVE_DMA_ATTRS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_FTRACE_MCOUNT_RECORD
@@ -580,7 +582,6 @@ config QDIO
 
 menuconfig PCI
bool "PCI support"
-   select HAVE_DMA_ATTRS
select PCI_MSI
help
  Enable PCI support.
diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h
index d8f9872..4a9f35e 100644
--- a/arch/s390/include/asm/device.h
+++ b/arch/s390/include/asm/device.h
@@ -3,5 +3,9 @@
  *
  * This file is released under the GPLv2
  */
-#include 
+struct dev_archdata {
+   struct dma_map_ops *dma_ops;
+};
 
+struct pdev_archdata {
+};
diff --git a/arch/s390/include/asm/dma-mapping.h 
b/arch/s390/include/asm/dma-mapping.h
index b3fd54d..cb05f5c 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -11,11 +11,13 @@
 
 #define DMA_ERROR_CODE (~(dma_addr_t) 0x0)
 
-extern struct dma_map_ops s390_dma_ops;
+extern struct dma_map_ops s390_pci_dma_ops;
 
 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 {
-   return _dma_ops;
+   if (dev && dev->archdata.dma_ops)
+   return dev->archdata.dma_ops;
+   return _noop_ops;
 }
 
 static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 7ef12a3..fa41605 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -649,6 +649,7 @@ int pcibios_add_device(struct pci_dev *pdev)
 
zdev->pdev = pdev;
pdev->dev.groups = zpci_attr_groups;
+   pdev->dev.archdata.dma_ops = _pci_dma_ops;
zpci_map_resources(pdev);
 
for (i = 0; i < PCI_BAR_COUNT; i++) {
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 37505b8..ea39c3f 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -495,7 +495,7 @@ static int __init dma_debug_do_init(void)
 }
 fs_initcall(dma_debug_do_init);
 
-struct dma_map_ops s390_dma_ops = {
+struct dma_map_ops s390_pci_dma_ops = {
.alloc  = s390_dma_alloc,
.free   = s390_dma_free,
.map_sg = s390_dma_map_sg,
@@ -506,7 +506,7 @@ struct dma_map_ops s390_dma_ops = {
.is_phys= 0,
/* dma_supported is unconditionally true without a callback */
 };
-EXPORT_SYMBOL_GPL(s390_dma_ops);
+EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
 
 static int __init s390_iommu_setup(char *str)
 {
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvm-unit-tests PATCH 00/14] ppc64: initial drop

2015-11-03 Thread Paolo Bonzini


On 03/11/2015 08:08, Thomas Huth wrote:
> On 03/08/15 16:41, Andrew Jones wrote:
>> > This series is the first series of a series of series that will
>> > bring support to kvm-unit-tests for ppc64, and eventually ppc64le.
>  Hi Andrew,
> 
> may I ask about the current state of ppc64 support in the
> kvm-unit-tests? Is there a newer version available than the one you
> posted three months ago?

I've been a slob with all the kvm-unit-tests patches.  Andrew, can you
send a single submission of all the patches, so that I can review them
and apply them?

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/3] KVM: x86: simplify RSM into 64-bit protected mode

2015-11-03 Thread Laszlo Ersek
On 11/02/15 10:32, Paolo Bonzini wrote:
> 
> 
> On 31/10/2015 20:50, Laszlo Ersek wrote:
>> Tested-by: Laszlo Ersek 
> 
> Thanks Laszlo, I applied patches 1 and 2 (since your "part 2" never was :)).
> 
> Paolo
> 

Thanks.

Since you can rebase the queue freely, can you please also add:

Reported-by: Laszlo Ersek 

to Radim's patch "KVM: x86: handle SMBASE as physical address in RSM"?

Thanks
Laszlo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v4 0/3] KVM: arm/arm64: Clean up some obsolete code

2015-11-03 Thread Pavel Fedin
 Hello!

>  By this time i'll make a very minimal version of patch 0001, for you to test 
> it. If we have
> problems with current 0001, which we
> cannot solve quickly, we could stick to that version then, which will provide 
> the necessary
> changes to plug in LPIs, yet with
> minimal changes (it will only remove vgic_irq_lr_map).
>  I guess i should have done it before. Or, i could even respin v5, with 
> current 0001 split up.
> This should make it easier to bisect
> the problem.

 So, i have just sent v5, conditions are the same as before. It is OK to stop 
at any point, and actually you should be able to
easily throw away 0003 and apply just 1, 2, 4. The minimum needed thing for 
LPIs introduction is 0001.
 You can also stick to v4 if the problem does not get triggered by its first 
patch, if you prefer reduced commit log.

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] KVM: x86: Reset RFLAGS state following processor init/reset

2015-11-03 Thread Paolo Bonzini


On 03/11/2015 12:40, Wanpeng Li wrote:
> Reference SDM Volume 1 3.4.3:
> 
> Following initialization of the processor (either by asserting the 
> RESET pin or the INIT pin), the state of the EFLAGS register is 
> 0002H.
> 
> However, the eflags fixed bit is not set and other bits are also not 
> cleared during the init/reset in kvm.
> 
> This patch reset eflags register to 0002H following initialization 
> of the processor.
> 
> Signed-off-by: Wanpeng Li 
> ---
> v1 -> v2:
>  * use vmcs_writel
> 
>  arch/x86/kvm/vmx.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index b680c2e..1a95ef7 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4935,6 +4935,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool 
> init_event)
>   vmx_set_efer(vcpu, 0);
>   vmx_fpu_activate(vcpu);
>   update_exception_bitmap(vcpu);
> + vmcs_writel(GUEST_RFLAGS, X86_EFLAGS_FIXED);
>  
>   vpid_sync_context(vmx->vpid);
>  }
> 

No, this is doing exactly the same thing that is already done elsewhere
in vmx_vcpu_reset (which Nadav pointed out to you).  So it's not just a
pointless addition with no effect at all; it's wrong, because it
introduces duplication.

Please answer this question: is there a bug or not?

If yes, then using kvm_set_rflags as in v1 is the right thing.  However,
you have to remove the _existing_ vmcs_writel call in vmx_vcpu_reset.
Also, if there is a bug you have to explain it in the commit message and
provide a testcase.  By the way, I am still waiting for the VPID test cases.

If no, then this is a cleanup, we can still do the change but you have
to explain this in the commit message.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH v3 9/9] kvm/x86: Hyper-V kvm exit

2015-11-03 Thread Roman Kagan
On Tue, Nov 03, 2015 at 03:51:16PM +0100, Paolo Bonzini wrote:
> 
> 
> On 03/11/2015 15:36, Andrey Smetanin wrote:
> >>
> >>
> >> if I run a patched QEMU but I *do not* enable the synthetic interrupt
> >> controller.  I can fix it by wrapping the calls to synic_exit with "if
> >> (!host)", but I haven't checked yet the source---so that may not be the
> >> proper fix.  Sorry for not having looked more in detail.
> >>
> > Could you please specify test case(kvm unit tests ?) and kernel/qemu(if
> > it's not standard)?
> 
> It happens just by starting QEMU.
> 
> Kernel: kvm/queue
> + kvm/irqchip: kvm_arch_irq_routing_update renaming split
> + kvm/x86: split ioapic-handled and EOI exit bitmaps
> + kvm/x86: Hyper-V synthetic interrupt controller
> + kvm/x86: Hyper-V kvm exit
> 
> QEMU: 3a958f559ecd
> + standard-headers/x86: add Hyper-V SynIC constants
> + target-i386/kvm: Hyper-V SynIC MSR's support
> + linux-headers/kvm: add Hyper-V SynIC irq routing type and struct
> + kvm: Hyper-V SynIC irq routing support
> + linux-headers/kvm: KVM_EXIT_HYPERV type and struct
> + target-i386/hyperv: Hyper-V SynIC SINT routing and vCPU exit
> + hw/misc: Hyper-V test device 'hyperv-testdev'
> 
> Can be reproduced just with
> "../qemu/+build/x86_64-softmmu/qemu-system-x86_64 --enable-kvm -cpu
> kvm64 -display none".

Thanks!  We've figured it out:

qemu initializes the MSRs if has_msr_hv_synic is set, which depends only
on whether the kernel supports the MSRs and ignores the cpu property.

OTOH setting those MSRs (on the host side) triggers a vcpu exit which
checks the cpu property and aborts if it's unset.  Voila.

This way we also discovered that no error was triggered when the cpu
property was set but the kernel didn't support it (and this problem was
also present in other hyperv-related features).

The solution appears to be to bail out when a hyperv property is
requested but the host doesn't support it, and then check for the
property only when deciding if the relevant actions need to be taken.

Protecting vcpu exits with !host in the kernel seems to make sense, too.

We're in progress of preparing the updated patches.

Thanks,
Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 9/9] kvm/x86: Hyper-V kvm exit

2015-11-03 Thread Andrey Smetanin



On 11/03/2015 05:51 PM, Paolo Bonzini wrote:



On 03/11/2015 15:36, Andrey Smetanin wrote:



if I run a patched QEMU but I *do not* enable the synthetic interrupt
controller.  I can fix it by wrapping the calls to synic_exit with "if
(!host)", but I haven't checked yet the source---so that may not be the
proper fix.  Sorry for not having looked more in detail.


Could you please specify test case(kvm unit tests ?) and kernel/qemu(if
it's not standard)?


It happens just by starting QEMU.

Kernel: kvm/queue
+ kvm/irqchip: kvm_arch_irq_routing_update renaming split
+ kvm/x86: split ioapic-handled and EOI exit bitmaps
+ kvm/x86: Hyper-V synthetic interrupt controller
+ kvm/x86: Hyper-V kvm exit

QEMU: 3a958f559ecd
+ standard-headers/x86: add Hyper-V SynIC constants
+ target-i386/kvm: Hyper-V SynIC MSR's support
+ linux-headers/kvm: add Hyper-V SynIC irq routing type and struct
+ kvm: Hyper-V SynIC irq routing support
+ linux-headers/kvm: KVM_EXIT_HYPERV type and struct
+ target-i386/hyperv: Hyper-V SynIC SINT routing and vCPU exit
+ hw/misc: Hyper-V test device 'hyperv-testdev'

Can be reproduced just with
"../qemu/+build/x86_64-softmmu/qemu-system-x86_64 --enable-kvm -cpu
kvm64 -display none".


Thanks!
We probably found root case -
qemu reads/writes Hyper-V SynIC msrs just by check SynIC MSR's support 
in kernel. So KVM synic exits into userspace(at SynIC MSR's writes), 
while userspace Hyper-V SynIC handler doesn't expect this exit(cpu 
'hv-synic' option is not set), so handler returns -1 and qemu exits.

Paolo


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM/arm: kernel low level debug support for ARM32 virtual platforms

2015-11-03 Thread Mario Smarduch


On 11/3/2015 8:33 AM, Christopher Covington wrote:
> Hi Mario,
> 
> On 11/02/2015 06:51 PM, Mario Smarduch wrote:
>> Hello,
>>this is a re-post from couple weeks ago, please take time to review this 
>> simple patch which simplifies DEBUG_LL and prevents kernel crash on virtual 
>> platforms.
>>
>> Before this patch DEBUG_LL for 'dummy virtual machine':
>>
>> ( ) Kernel low-level debugging via EmbeddedICE DCC channel
>> ( ) Kernel low-level debug output via semihosting I/O
>> ( ) Kernel low-level debugging via 8250 UART
>> ( ) Kernel low-level debugging via ARM Ltd PL01x Primecell
>>
>> In summary if debug uart is not emulated kernel crashes.
>> And once you pass that hurdle, uart physical/virtual addresses are unknown.
>> DEBUG_LL comes in handy on many occasions and should be somewhat 
>> intuitive to use like it is for physical platforms. For virtual platforms
>> user may start daubting the host and get into a bigger mess.
>>
>> After this patch is applied user gets:
>>
>> (X) Kernel low-level debugging on QEMU Virtual Platform
>> ( ) Kernel low-level debugging on Kvmtool Virtual Platform
>>  . above repeated 
>>
>> The virtual addresses selected follow arm reference models, high in vmalloc 
>> section with high mem enabled and guest running with >= 1GB of memory. The 
>> offset is leftover from arm reference models.
> 
> Which model? It doesn't appear to match the vexpress AEM/RTSM/FVP/whatever
> which used 0x1c09 for UART0.

I recall QEMU virt model had it's own physical address map, for sure I saw the
virtio-mmio regions assigned in some ARM document. Peter would you know?

As far as kvmtool I'm not sure, currently PC1 COM1 port is used? Andre will that
stay fixed?

> 
>> The patch is against 4.2.0-rc2 commit 43297dda0a51
>>
>> Original Description
>> 
>> When booting a VM using QEMU or Kvmtool there are no clear ways to 
>> enable low level debugging for these virtual platforms. some menu port 
>> choices are not supported by the virtual platforms at all. And there is no
>> help on the location of physical and virtual addresses for the ports.
>> This may lead to wrong debug port and a frozen VM with a blank screen.
>>
>> This patch adds menu selections for QEMU and Kvmtool virtual platforms for 
>> low 
>> level kernel print debugging. Help section displays port physical and
>> virutal addresses.
>>
>> ARM reference models use the MIDR register to run-time select UART port 
>> address 
>> (for ARCH_VEXPRESS) based on A9 or A15 part numbers. Looked for a same 
>> approach
>> but couldn't find a way to differentiate between virtual platforms, something
>> like a platform register.
>>
>> Acked-by: Christoffer Dall 
>> Signed-off-by: Mario Smarduch 
>> ---
>>  arch/arm/Kconfig.debug | 22 ++
>>  1 file changed, 22 insertions(+)
>>
>> diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
>> index a2e16f9..d126bd4 100644
>> --- a/arch/arm/Kconfig.debug
>> +++ b/arch/arm/Kconfig.debug
>> @@ -1155,6 +1155,28 @@ choice
>>This option selects UART0 on VIA/Wondermedia System-on-a-chip
>>devices, including VT8500, WM8505, WM8650 and WM8850.
>>  
>> +config DEBUG_VIRT_UART_QEMU
>> +bool "Kernel low-level debugging on QEMU Virtual Platform"
>> +depends on ARCH_VIRT
>> +select DEBUG_UART_PL01X
>> +help
>> +  Say Y here if you want the debug print routines to direct
>> +  their output to PL011 UART port on QEMU Virtual Platform.
>> +  Appropriate address values are:
>> +PHYSVIRT
>> +0x900   0xf809
> 
> I thought the only guarantee the virt machine had about the memory map was
> that it would be described in the device tree.
> 
>> +config DEBUG_VIRT_UART_KVMTOOL
>> +bool "Kernel low-level debugging on Kvmtool Virtual Platform"
>> +depends on ARCH_VIRT
>> +select DEBUG_UART_8250
>> +help
>> +  Say Y here if you want the debug print routines to direct
>> +  their output to 8250 UART port on Kvmtool Virtual
>> +  Platform. Appropriate address values are:
>> +PHYSVIRT
>> +0x3f8   0xf80903f8
>> +
>>  config DEBUG_ICEDCC
>>  bool "Kernel low-level debugging via EmbeddedICE DCC channel"
>>  help
>>
> 
> Regards,
> Christopher Covington
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: allow RSM from 64-bit mode

2015-11-03 Thread Laszlo Ersek
On 11/03/15 15:04, Paolo Bonzini wrote:
> 
> 
> On 03/11/2015 15:02, Laszlo Ersek wrote:
>> On 11/03/15 14:46, Paolo Bonzini wrote:
>>>
>>>
>>> On 03/11/2015 14:40, Laszlo Ersek wrote:
 On 11/03/15 14:29, Paolo Bonzini wrote:
> The SDM says that exiting system management mode from 64-bit mode
> is invalid, but that would be too good to be true.  But actually,
> most of the code is already there to support exiting from compat
> mode (EFER.LME=1, EFER.LMA=0).  Getting all the way from 64-bit
> mode to real mode only requires clearing CS.L and CR4.PCIDE.
>
> Cc: sta...@vger.kernel.org
> Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c
> Cc: Laszlo Ersek 
> Cc: Radim Krčmář 
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/kvm/emulate.c | 30 +-
>  1 file changed, 25 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index b60fed56671b..1505587d06e9 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -2484,16 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
>  
>   /*
>* Get back to real mode, to prepare a safe state in which to load
> -  * CR0/CR3/CR4/EFER.
> -  *
> -  * CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
> +  * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
> +  * supports long mode.
>*/
> + cr4 = ctxt->ops->get_cr(ctxt, 4);
> + if (emulator_has_longmode(ctxt)) {
> + struct desc_struct cs_desc;
> +
> + /* Zero CR4.PCIDE before CR0.PG.  */
> + if (cr4 & X86_CR4_PCIDE) {
> + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
> + cr4 &= ~X86_CR4_PCIDE;
> + }
> +
> + /* A 32-bit code segment is required to clear EFER.LMA.  */
> + memset(_desc, 0, sizeof(cs_desc));
> + cs_desc.type = 0xb;
> + cs_desc.s = cs_desc.g = cs_desc.p = 1;
> + ctxt->ops->set_segment(ctxt, 0, _desc, 0, VCPU_SREG_CS);
> + }
> +
> + /* For the 64-bit case, this will clear EFER.LMA.  */
>   cr0 = ctxt->ops->get_cr(ctxt, 0);
>   if (cr0 & X86_CR0_PE)
>   ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
> - cr4 = ctxt->ops->get_cr(ctxt, 4);
> +
> + /* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
>   if (cr4 & X86_CR4_PAE)
>   ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
> +
> + /* And finally go back to 32-bit mode.  */
>   efer = 0;
>   ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
>  
> @@ -4454,7 +4474,7 @@ static const struct opcode twobyte_table[256] = {
>   F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
>   /* 0xA8 - 0xAF */
>   I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
> - II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
> + II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
>   F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
>   F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
>   F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
>

 What branch should I test this on top of?
>>>
>>> Just use whatever you were using before, and revert commit c9db607
>>> ("UefiCpuPkg: PiSmmCpuDxeSmm: do not execute RSM from 64-bit mode",
>>> 2015-10-14) from your OVMF branch.
>>
>> Right, I planned to do that OVMF-side revert; I just wasn't sure if e.g.
>> kvm/queue had some prerequisite patches for this.
> 
> Indeed, you can use either your "part 2" series or Radim's patches from
> kvm/queue, it's the same.

I noticed that you applied this patch to kvm/queue, at
9f64d2c75fa6a5aac0a5657400f3473f8144c3be. So I simply tested kvm/queue
in that state. (I did not forget to drop the workaround OVMF patch first.)

Tested-by: Laszlo Ersek 

Thank you!
Laszlo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 7/8] vfio: platform: add dev_info on device reset

2015-11-03 Thread Eric Auger
It might be helpful for the end-user to check the device reset
function was found by the vfio platform reset framework.

Lets store a pointer to the struct device in vfio_platform_device
and trace when the reset function is called or not found.

Signed-off-by: Eric Auger 

---

v3: creation
---
 drivers/vfio/platform/vfio_platform_common.c  | 14 --
 drivers/vfio/platform/vfio_platform_private.h |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index f74836a..376d289 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -144,8 +144,12 @@ static void vfio_platform_release(void *device_data)
mutex_lock(_lock);
 
if (!(--vdev->refcnt)) {
-   if (vdev->reset)
+   if (vdev->reset) {
+   dev_info(vdev->device, "reset\n");
vdev->reset(vdev);
+   } else {
+   dev_warn(vdev->device, "no reset function found!\n");
+   }
vfio_platform_regions_cleanup(vdev);
vfio_platform_irq_cleanup(vdev);
}
@@ -174,8 +178,12 @@ static int vfio_platform_open(void *device_data)
if (ret)
goto err_irq;
 
-   if (vdev->reset)
+   if (vdev->reset) {
+   dev_info(vdev->device, "reset\n");
vdev->reset(vdev);
+   } else {
+   dev_warn(vdev->device, "no reset function found!\n");
+   }
}
 
vdev->refcnt++;
@@ -551,6 +559,8 @@ int vfio_platform_probe_common(struct vfio_platform_device 
*vdev,
return -EINVAL;
}
 
+   vdev->device = dev;
+
group = iommu_group_get(dev);
if (!group) {
pr_err("VFIO: No IOMMU group for device %s\n", vdev->name);
diff --git a/drivers/vfio/platform/vfio_platform_private.h 
b/drivers/vfio/platform/vfio_platform_private.h
index d1b0668..42816dd 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -59,6 +59,7 @@ struct vfio_platform_device {
struct module   *parent_module;
const char  *compat;
struct module   *reset_module;
+   struct device   *device;
 
/*
 * These fields should be filled by the bus specific binder
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 3/8] vfio: platform: introduce module_vfio_reset_handler macro

2015-11-03 Thread Eric Auger
The module_vfio_reset_handler macro
- define a module alias
- implement module init/exit function which respectively registers
  and unregisters the reset function.

Signed-off-by: Eric Auger 
Reviewed-by: Arnd Bergmann 

---
v4 -> v5:
- add Arnd's R-b

v3 -> v4:
- pass reset to vfio_platform_unregister_reset

v2 -> v3:
- use vfio_platform_register_reset macro

v1 -> v2:
- remove vfio_platform_reset_private.h and move back the macro to
  vfio_platform_private.h header: removed reset_module_register &
  unregister (symbol_get)
- defines the module_vfio_reset_handler macro as suggested by Arnd
  (formerly in vfio_platform_reset_private.h)
---
 drivers/vfio/platform/vfio_platform_private.h | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/vfio/platform/vfio_platform_private.h 
b/drivers/vfio/platform/vfio_platform_private.h
index c563940..fd262be 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -110,4 +110,18 @@ static struct vfio_platform_reset_node __reset ## _node = 
{\
 }; \
 __vfio_platform_register_reset(&__reset ## _node)
 
+#define module_vfio_reset_handler(compat, reset)   \
+MODULE_ALIAS("vfio-reset:" compat);\
+static int __init reset ## _module_init(void)  \
+{  \
+   vfio_platform_register_reset(compat, reset);\
+   return 0;   \
+}; \
+static void __exit reset ## _module_exit(void) \
+{  \
+   vfio_platform_unregister_reset(compat, reset);  \
+}; \
+module_init(reset ## _module_init);\
+module_exit(reset ## _module_exit)
+
 #endif /* VFIO_PLATFORM_PRIVATE_H */
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 6/8] vfio: platform: use list of registered reset function

2015-11-03 Thread Eric Auger
Remove the static lookup table and use the dynamic list of registered
reset functions instead. Also load the reset module through its alias.
The reset struct module pointer is stored in vfio_platform_device.

We also remove the useless struct device pointer parameter in
vfio_platform_get_reset.

This patch fixes the issue related to the usage of __symbol_get, which
besides from being moot, prevented compilation with CONFIG_MODULES
disabled.

Also usage of MODULE_ALIAS makes possible to add a new reset module
without needing to update the framework. This was suggested by Arnd.

Signed-off-by: Eric Auger 
Reported-by: Arnd Bergmann 
Reviewed-by: Arnd Bergmann 

---

v3 -> v4:
- add Arnd R-b.
- Remove the EXPORT_SYMBOL_GPL(vfio_platform_calxedaxgmac_reset) here

v2 -> v3:
- remove clear of vfio_platform_device reset_module and reset
  in vfio_platform_put_reset
- single unlock in vfio_platform_lookup_reset
- use driver_lock instead of reset_lock

v1 -> v2:
- use reset_lock in vfio_platform_lookup_reset
- remove vfio_platform_reset_combo declaration
- remove struct device *dev parameter in vfio_platform_get_reset
- set reset_module and reset to NULL in put function
---
 .../platform/reset/vfio_platform_calxedaxgmac.c|  1 -
 drivers/vfio/platform/vfio_platform_common.c   | 52 --
 drivers/vfio/platform/vfio_platform_private.h  |  7 +--
 3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c 
b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
index 80718f2..640f5d8 100644
--- a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
+++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
@@ -76,7 +76,6 @@ int vfio_platform_calxedaxgmac_reset(struct 
vfio_platform_device *vdev)
 
return 0;
 }
-EXPORT_SYMBOL_GPL(vfio_platform_calxedaxgmac_reset);
 
 module_vfio_reset_handler("calxeda,hb-xgmac", 
vfio_platform_calxedaxgmac_reset);
 
diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index f2d41a0..f74836a 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -30,37 +30,43 @@
 static LIST_HEAD(reset_list);
 static DEFINE_MUTEX(driver_lock);
 
-static const struct vfio_platform_reset_combo reset_lookup_table[] = {
-   {
-   .compat = "calxeda,hb-xgmac",
-   .reset_function_name = "vfio_platform_calxedaxgmac_reset",
-   .module_name = "vfio-platform-calxedaxgmac",
-   },
-};
-
-static void vfio_platform_get_reset(struct vfio_platform_device *vdev,
-   struct device *dev)
+static vfio_platform_reset_fn_t vfio_platform_lookup_reset(const char *compat,
+   struct module **module)
 {
-   int (*reset)(struct vfio_platform_device *);
-   int i;
+   struct vfio_platform_reset_node *iter;
+   vfio_platform_reset_fn_t reset_fn = NULL;
 
-   for (i = 0 ; i < ARRAY_SIZE(reset_lookup_table); i++) {
-   if (!strcmp(reset_lookup_table[i].compat, vdev->compat)) {
-   request_module(reset_lookup_table[i].module_name);
-   reset = __symbol_get(
-   reset_lookup_table[i].reset_function_name);
-   if (reset) {
-   vdev->reset = reset;
-   return;
-   }
+   mutex_lock(_lock);
+   list_for_each_entry(iter, _list, link) {
+   if (!strcmp(iter->compat, compat) &&
+   try_module_get(iter->owner)) {
+   *module = iter->owner;
+   reset_fn = iter->reset;
+   break;
}
}
+   mutex_unlock(_lock);
+   return reset_fn;
+}
+
+static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
+{
+   char modname[256];
+
+   vdev->reset = vfio_platform_lookup_reset(vdev->compat,
+   >reset_module);
+   if (!vdev->reset) {
+   snprintf(modname, 256, "vfio-reset:%s", vdev->compat);
+   request_module(modname);
+   vdev->reset = vfio_platform_lookup_reset(vdev->compat,
+>reset_module);
+   }
 }
 
 static void vfio_platform_put_reset(struct vfio_platform_device *vdev)
 {
if (vdev->reset)
-   symbol_put_addr(vdev->reset);
+   module_put(vdev->reset_module);
 }
 
 static int vfio_platform_regions_init(struct vfio_platform_device *vdev)
@@ -557,7 +563,7 @@ int vfio_platform_probe_common(struct vfio_platform_device 
*vdev,
return ret;
}
 
-   vfio_platform_get_reset(vdev, dev);
+   vfio_platform_get_reset(vdev);
 

Re: [PATCH] KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0()

2015-11-03 Thread Laszlo Ersek
On 11/03/15 19:34, Laszlo Ersek wrote:
> Commit b18d5431acc7 ("KVM: x86: fix CR0.CD virtualization") was
> technically correct, but it broke OVMF guests by slowing down various
> parts of the firmware.
> 
> Commit fb279950ba02 ("KVM: vmx: obey KVM_QUIRK_CD_NW_CLEARED") quirked the
> first function modified by b18d5431acc7, vmx_get_mt_mask(), for OVMF's
> sake. This restored the speed of the OVMF code that runs before
> PlatformPei (including the memory intensive LZMA decompression in SEC).
> 
> This patch extends the quirk to the second function modified by
> b18d5431acc7, kvm_set_cr0(). It eliminates the intrusive slowdown that
> hits the EFI_MP_SERVICES_PROTOCOL implementation of edk2's
> UefiCpuPkg/CpuDxe -- which is built into OVMF --, when CpuDxe starts up
> all APs at once for initialization, in order to count them.
> 
> We also carry over the kvm_arch_has_noncoherent_dma() sub-condition from
> the other half of the original commit b18d5431acc7.
> 
> Cc: Paolo Bonzini 
> Cc: Jordan Justen 
> Cc: Janusz Mocek 
> Cc: Alex Williamson 
> Cc: Xiao Guangrong 
> Signed-off-by: Laszlo Ersek 
> ---
>  arch/x86/kvm/x86.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a24bae0..30723a4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -625,7 +625,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>   if ((cr0 ^ old_cr0) & update_bits)
>   kvm_mmu_reset_context(vcpu);
>  
> - if ((cr0 ^ old_cr0) & X86_CR0_CD)
> + if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
> + kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
> + !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
>   kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
>  
>   return 0;
> 


I had notes on this patch, but I forgot to format it with --notes. They
were:

- People on the CC list, please reply with your Tested-by, Reported-by,
  etc tags as appropriate; it's getting blurry who participated in what
  and how.

- This patch is *not* necessary for the OVMF SMM work; instead it
  addresses an independent OVMF boot regression seen by users.

Thanks
Laszlo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH/RFC 0/4] dma ops and virtio

2015-11-03 Thread Andy Lutomirski
On Tue, Nov 3, 2015 at 12:14 AM, Christian Borntraeger
 wrote:
> Am 02.11.2015 um 21:23 schrieb Andy Lutomirski:
>> On Mon, Nov 2, 2015 at 3:16 AM, Cornelia Huck  
>> wrote:
>>> On Fri, 30 Oct 2015 13:33:07 -0700
>>> Andy Lutomirski  wrote:
>>>
 On Fri, Oct 30, 2015 at 1:25 AM, Cornelia Huck  
 wrote:
> On Thu, 29 Oct 2015 15:50:38 -0700
> Andy Lutomirski  wrote:
>
>> Progress!  After getting that sort-of-working, I figured out what was
>> wrong with my earlier command, and I got that working, too.  Now I
>> get:
>>
>> qemu-system-s390x -fsdev
>> local,id=virtfs1,path=/,security_model=none,readonly -device
>> virtio-9p-ccw,fsdev=virtfs1,mount_tag=/dev/root -M s390-ccw-virtio
>> -nodefaults -device sclpconsole,chardev=console -parallel none -net
>> none -echr 1 -serial none -chardev stdio,id=console,signal=off,mux=on
>> -serial chardev:console -mon chardev=console -vga none -display none
>> -kernel arch/s390/boot/bzImage -append
>> 'init=/home/luto/devel/virtme/virtme/guest/virtme-init
>> psmouse.proto=exps "virtme_stty_con=rows 24 cols 150 iutf8"
>> TERM=xterm-256color rootfstype=9p
>> rootflags=ro,version=9p2000.L,trans=virtio,access=any
>> raid=noautodetect debug'
>
> The commandline looks sane AFAICS.
>
> (...)
>
>> vrfy: device 0.0.: rc=0 pgroup=0 mpath=0 vpm=80
>> virtio_ccw 0.0.: Failed to set online: -5
>>
>> ^^^ bad news!
>
> I'd like to see where in the onlining process this fails. Could you set
> up qemu tracing for css_* and virtio_ccw_* (instructions in
> qemu/docs/tracing.txt)?

 I have a file called events that contains:

 css_*
 virtio_ccw_*

 pointing -trace events= at it results in a trace- file that's 549
 bytes long and contains nothing.  Are wildcards not as well-supported
 as the docs suggest?
>>>
>>> Just tried it, seemed to work for me as expected. And as your messages
>>> indicate, at least some of the css tracepoints are guaranteed to be
>>> hit. Odd.
>>>
>>> Can you try the following sophisticated printf debug patch?
>>>
>>> diff --git a/hw/s390x/css.c b/hw/s390x/css.c
>>> index c033612..6a87bd6 100644
>>> --- a/hw/s390x/css.c
>>> +++ b/hw/s390x/css.c
>>> @@ -308,6 +308,8 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr 
>>> ccw_addr)
>>>  sch->ccw_no_data_cnt++;
>>>  }
>>>
>>> +fprintf(stderr, "CH DBG: %s: cmd_code=%x\n", __func__, ccw.cmd_code);
>>> +
>>>  /* Look at the command. */
>>>  switch (ccw.cmd_code) {
>>>  case CCW_CMD_NOOP:
>>> @@ -375,6 +377,7 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr 
>>> ccw_addr)
>>>  }
>>>  break;
>>>  }
>>> +fprintf(stderr, "CH DBG: %s: ret=%d\n", __func__, ret);
>>>  sch->last_cmd = ccw;
>>>  sch->last_cmd_valid = true;
>>>  if (ret == 0) {
>>>
>>>
> Which qemu version is this, btw.?
>

 git from yesterday.
>>>
>>> Hm. Might be worth trying the s390-ccw-virtio-2.4 machine instead.
>>>
>>
>> No change.
>>
>> With s390-ccw-virtio-2.4, I get:
>>
>> Initializing cgroup subsys cpuset
>> Initializing cgroup subsys cpu
>> Initializing cgroup subsys cpuacct
>> Linux version 4.3.0-rc7-8-gff230d6ec6b2
>> (l...@amaluto.corp.amacapital.net) (gcc version 5.1.1 20150618 (Red
>> Hat Cross 5.1.1-3) (GCC) ) #344 SMP Fri Oct 30 13:16:13 PDT 2015
>> setup: Linux is running under KVM in 64-bit mode
>> setup: Max memory size: 128MB
>> Zone ranges:
>>   DMA  [mem 0x-0x7fff]
>>   Normal   empty
>> Movable zone start for each node
>> Early memory node ranges
>>   node   0: [mem 0x-0x07ff]
>> Initmem setup node 0 [mem 0x-0x07ff]
>> On node 0 totalpages: 32768
>>   DMA zone: 512 pages used for memmap
>>   DMA zone: 0 pages reserved
>>   DMA zone: 32768 pages, LIFO batch:7
>> PERCPU: Embedded 466 pages/cpu @07605000 s1868032 r8192 d32512 
>> u1908736
>> pcpu-alloc: s1868032 r8192 d32512 u1908736 alloc=466*4096
>> pcpu-alloc: [0] 0 [0] 1
>> Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 32256
>> Kernel command line:
>> init=/home/luto/devel/virtme/virtme/guest/virtme-init
>> psmouse.proto=exps "virtme_stty_con=rows 45 cols 150 iutf8"
>> TERM=xterm-256color rootfstype=9p
>> rootflags=version=9p2000.L,trans=virtio,access=any raid=noautodetect
>> ro debug
>> PID hash table entries: 512 (order: 0, 4096 bytes)
>> Dentry cache hash table entries: 16384 (order: 5, 131072 bytes)
>> Inode-cache hash table entries: 8192 (order: 4, 65536 bytes)
>> Memory: 92520K/131072K available (8255K kernel code, 802K rwdata,
>
>
> can you send your kernel config?
>

Attached.

A failing command looks like:

qemu-system-s390x -fsdev
local,id=virtfs1,path=/,security_model=none,readonly 

Re: [PATCH] KVM/arm: kernel low level debug support for ARM32 virtual platforms

2015-11-03 Thread Will Deacon
On Tue, Nov 03, 2015 at 09:44:52AM -0800, Mario Smarduch wrote:
> On 11/3/2015 8:33 AM, Christopher Covington wrote:
> > On 11/02/2015 06:51 PM, Mario Smarduch wrote:
> >>this is a re-post from couple weeks ago, please take time to review 
> >> this 
> >> simple patch which simplifies DEBUG_LL and prevents kernel crash on 
> >> virtual 
> >> platforms.
> >>
> >> Before this patch DEBUG_LL for 'dummy virtual machine':
> >>
> >> ( ) Kernel low-level debugging via EmbeddedICE DCC channel
> >> ( ) Kernel low-level debug output via semihosting I/O
> >> ( ) Kernel low-level debugging via 8250 UART
> >> ( ) Kernel low-level debugging via ARM Ltd PL01x Primecell
> >>
> >> In summary if debug uart is not emulated kernel crashes.
> >> And once you pass that hurdle, uart physical/virtual addresses are unknown.
> >> DEBUG_LL comes in handy on many occasions and should be somewhat 
> >> intuitive to use like it is for physical platforms. For virtual platforms
> >> user may start daubting the host and get into a bigger mess.
> >>
> >> After this patch is applied user gets:
> >>
> >> (X) Kernel low-level debugging on QEMU Virtual Platform
> >> ( ) Kernel low-level debugging on Kvmtool Virtual Platform
> >>. above repeated 
> >>
> >> The virtual addresses selected follow arm reference models, high in 
> >> vmalloc 
> >> section with high mem enabled and guest running with >= 1GB of memory. The 
> >> offset is leftover from arm reference models.
> > 
> > Which model? It doesn't appear to match the vexpress AEM/RTSM/FVP/whatever
> > which used 0x1c09 for UART0.
> 
> I recall QEMU virt model had it's own physical address map, for sure I saw the
> virtio-mmio regions assigned in some ARM document. Peter would you know?
> 
> As far as kvmtool I'm not sure, currently PC1 COM1 port is used? Andre will 
> that
> stay fixed?

We make absolutely no guarantees about the memory map provided by kvmtool.

Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 1/8] vfio: platform: introduce vfio-platform-base module

2015-11-03 Thread Eric Auger
To prepare for vfio platform reset rework let's build
vfio_platform_common.c and vfio_platform_irq.c in a separate
module from vfio-platform and vfio-amba. This makes possible
to have separate module inits and works around a race between
platform driver init and vfio reset module init: that way we
make sure symbols exported by base are available when vfio-platform
driver gets probed.

The open/release being implemented in the base module, the ref
count is applied to the parent module instead.

Signed-off-by: Eric Auger 
Suggested-by: Arnd Bergmann 
Reviewed-by: Arnd Bergmann 

---
v3 -> v4:
- add Arnd R-b

v3: creation
---
 drivers/vfio/platform/Makefile|  6 --
 drivers/vfio/platform/vfio_amba.c |  1 +
 drivers/vfio/platform/vfio_platform.c |  1 +
 drivers/vfio/platform/vfio_platform_common.c  | 13 +++--
 drivers/vfio/platform/vfio_platform_private.h |  1 +
 5 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile
index 9ce8afe..41a6224 100644
--- a/drivers/vfio/platform/Makefile
+++ b/drivers/vfio/platform/Makefile
@@ -1,10 +1,12 @@
-
-vfio-platform-y := vfio_platform.o vfio_platform_common.o vfio_platform_irq.o
+vfio-platform-base-y := vfio_platform_common.o vfio_platform_irq.o
+vfio-platform-y := vfio_platform.o
 
 obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o
+obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform-base.o
 obj-$(CONFIG_VFIO_PLATFORM) += reset/
 
 vfio-amba-y := vfio_amba.o
 
 obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o
+obj-$(CONFIG_VFIO_AMBA) += vfio-platform-base.o
 obj-$(CONFIG_VFIO_AMBA) += reset/
diff --git a/drivers/vfio/platform/vfio_amba.c 
b/drivers/vfio/platform/vfio_amba.c
index ff0331f..a66479b 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -67,6 +67,7 @@ static int vfio_amba_probe(struct amba_device *adev, const 
struct amba_id *id)
vdev->flags = VFIO_DEVICE_FLAGS_AMBA;
vdev->get_resource = get_amba_resource;
vdev->get_irq = get_amba_irq;
+   vdev->parent_module = THIS_MODULE;
 
ret = vfio_platform_probe_common(vdev, >dev);
if (ret) {
diff --git a/drivers/vfio/platform/vfio_platform.c 
b/drivers/vfio/platform/vfio_platform.c
index cef645c..f1625dc 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -65,6 +65,7 @@ static int vfio_platform_probe(struct platform_device *pdev)
vdev->flags = VFIO_DEVICE_FLAGS_PLATFORM;
vdev->get_resource = get_platform_resource;
vdev->get_irq = get_platform_irq;
+   vdev->parent_module = THIS_MODULE;
 
ret = vfio_platform_probe_common(vdev, >dev);
if (ret)
diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index e43efb5..184e9d2 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -23,6 +23,10 @@
 
 #include "vfio_platform_private.h"
 
+#define DRIVER_VERSION  "0.10"
+#define DRIVER_AUTHOR   "Antonios Motakis "
+#define DRIVER_DESC "VFIO platform base module"
+
 static DEFINE_MUTEX(driver_lock);
 
 static const struct vfio_platform_reset_combo reset_lookup_table[] = {
@@ -146,7 +150,7 @@ static void vfio_platform_release(void *device_data)
 
mutex_unlock(_lock);
 
-   module_put(THIS_MODULE);
+   module_put(vdev->parent_module);
 }
 
 static int vfio_platform_open(void *device_data)
@@ -154,7 +158,7 @@ static int vfio_platform_open(void *device_data)
struct vfio_platform_device *vdev = device_data;
int ret;
 
-   if (!try_module_get(THIS_MODULE))
+   if (!try_module_get(vdev->parent_module))
return -ENODEV;
 
mutex_lock(_lock);
@@ -573,3 +577,8 @@ struct vfio_platform_device 
*vfio_platform_remove_common(struct device *dev)
return vdev;
 }
 EXPORT_SYMBOL_GPL(vfio_platform_remove_common);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/platform/vfio_platform_private.h 
b/drivers/vfio/platform/vfio_platform_private.h
index 1c9b3d5..7128690 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -56,6 +56,7 @@ struct vfio_platform_device {
u32 num_irqs;
int refcnt;
struct mutexigate;
+   struct module   *parent_module;
 
/*
 * These fields should be filled by the bus specific binder
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 5/8] vfio: platform: add compat in vfio_platform_device

2015-11-03 Thread Eric Auger
Let's retrieve the compatibility string on probe and store it
in the vfio_platform_device struct

Signed-off-by: Eric Auger 

---

v2 -> v3:
- populate compat after vdev check
---
 drivers/vfio/platform/vfio_platform_common.c  | 15 ---
 drivers/vfio/platform/vfio_platform_private.h |  1 +
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index 3b7e52c..f2d41a0 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -41,16 +41,11 @@ static const struct vfio_platform_reset_combo 
reset_lookup_table[] = {
 static void vfio_platform_get_reset(struct vfio_platform_device *vdev,
struct device *dev)
 {
-   const char *compat;
int (*reset)(struct vfio_platform_device *);
-   int ret, i;
-
-   ret = device_property_read_string(dev, "compatible", );
-   if (ret)
-   return;
+   int i;
 
for (i = 0 ; i < ARRAY_SIZE(reset_lookup_table); i++) {
-   if (!strcmp(reset_lookup_table[i].compat, compat)) {
+   if (!strcmp(reset_lookup_table[i].compat, vdev->compat)) {
request_module(reset_lookup_table[i].module_name);
reset = __symbol_get(
reset_lookup_table[i].reset_function_name);
@@ -544,6 +539,12 @@ int vfio_platform_probe_common(struct vfio_platform_device 
*vdev,
if (!vdev)
return -EINVAL;
 
+   ret = device_property_read_string(dev, "compatible", >compat);
+   if (ret) {
+   pr_err("VFIO: cannot retrieve compat for %s\n", vdev->name);
+   return -EINVAL;
+   }
+
group = iommu_group_get(dev);
if (!group) {
pr_err("VFIO: No IOMMU group for device %s\n", vdev->name);
diff --git a/drivers/vfio/platform/vfio_platform_private.h 
b/drivers/vfio/platform/vfio_platform_private.h
index fd262be..415310f 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -57,6 +57,7 @@ struct vfio_platform_device {
int refcnt;
struct mutexigate;
struct module   *parent_module;
+   const char  *compat;
 
/*
 * These fields should be filled by the bus specific binder
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM/arm: kernel low level debug support for ARM32 virtual platforms

2015-11-03 Thread Rob Herring
On Tue, Nov 3, 2015 at 1:17 PM, Mario Smarduch  wrote:
>
>
> On 11/3/2015 9:55 AM, Will Deacon wrote:
>> On Tue, Nov 03, 2015 at 09:44:52AM -0800, Mario Smarduch wrote:
>>> On 11/3/2015 8:33 AM, Christopher Covington wrote:
 On 11/02/2015 06:51 PM, Mario Smarduch wrote:
>this is a re-post from couple weeks ago, please take time to review 
> this
> simple patch which simplifies DEBUG_LL and prevents kernel crash on 
> virtual
> platforms.
>
> Before this patch DEBUG_LL for 'dummy virtual machine':
>
> ( ) Kernel low-level debugging via EmbeddedICE DCC channel
> ( ) Kernel low-level debug output via semihosting I/O
> ( ) Kernel low-level debugging via 8250 UART
> ( ) Kernel low-level debugging via ARM Ltd PL01x Primecell
>
> In summary if debug uart is not emulated kernel crashes.
> And once you pass that hurdle, uart physical/virtual addresses are 
> unknown.
> DEBUG_LL comes in handy on many occasions and should be somewhat
> intuitive to use like it is for physical platforms. For virtual platforms
> user may start daubting the host and get into a bigger mess.
>
> After this patch is applied user gets:
>
> (X) Kernel low-level debugging on QEMU Virtual Platform
> ( ) Kernel low-level debugging on Kvmtool Virtual Platform
>. above repeated 
>
> The virtual addresses selected follow arm reference models, high in 
> vmalloc
> section with high mem enabled and guest running with >= 1GB of memory. The
> offset is leftover from arm reference models.

 Which model? It doesn't appear to match the vexpress AEM/RTSM/FVP/whatever
 which used 0x1c09 for UART0.
>>>
>>> I recall QEMU virt model had it's own physical address map, for sure I saw 
>>> the
>>> virtio-mmio regions assigned in some ARM document. Peter would you know?
>>>
>>> As far as kvmtool I'm not sure, currently PC1 COM1 port is used? Andre will 
>>> that
>>> stay fixed?
>>
>> We make absolutely no guarantees about the memory map provided by kvmtool.
>>
>> Will
>>
>
> If that's also the case for qemu, then I guess the best you can do is find a 
> way
> to dump the device tree. Find the uart, physical address and try figure out 
> the
> virtual address.
>
> Pretty involved, hoped for something more automated since that's a handy 
> feature.

You really only need LL_DEBUG now if you are debugging very early code
before memory is setup and/or bad memory. Use earlycon instead which
should already be supported both via the pl011 or semihosting. I used
it with QEMU semihosting support.

Rob
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH/RFC 0/4] dma ops and virtio

2015-11-03 Thread Cornelia Huck
On Mon, 2 Nov 2015 12:23:25 -0800
Andy Lutomirski  wrote:

> No change.

I'm stumped :(

Here's what I see:

(...)

> CH DBG: css_interpret_ccw: cmd_code=e4
> CH DBG: css_interpret_ccw: ret=0

sense id -> works

(...)

> CH DBG: css_interpret_ccw: cmd_code=3
> CH DBG: css_interpret_ccw: ret=0

nop (path verification) -> works

> CH DBG: css_interpret_ccw: cmd_code=83
> CH DBG: css_interpret_ccw: ret=-38

set revision -> -ENOSYS

This is fine; the virtio device is in legacy mode and the kernel will
try revision 1; qemu will reject this. The code should end up
generating a unit check with command reject, however...

> qeth: register layer 2 discipline
> qeth: register layer 3 discipline
> oprofile: using timer interrupt.
> NET: Registered protocol family 10
> virtio_ccw 0.0.: Failed to set online: -5

...this shows the kernel driver somehow did not end up with that
command reject (it would have triggered a retry with revision 0, and
the return code shows that no unit check/command reject was detected,
but some other error.)

> The lack of much interesting output makes me think that maybe I
> misconfigured something.

It's just failing very early in the setup phase. As it works for me
with a kvm setup, I'm suspecting some error in qemu's emulation code,
which is unfortunately not my turf.

Some more poke-around-in-the-dark ideas:

- Do you get more debug out put when you switch back to s390-ccw-virtio
(virtio-1), i.e. does cmd 83 work and is it followed by further
commands?

- Can you try with the following qemu logging patch

-8<--8<-

diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index c033612..80853a6 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -868,6 +868,7 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int 
*irb_len)
 PMCW *p = >curr_status.pmcw;
 uint16_t stctl;
 IRB irb;
+int i;
 
 if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
 return 3;
@@ -898,6 +899,14 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, 
int *irb_len)
 }
 }
 /* Store the irb to the guest. */
+fprintf(stderr, "CH DBG: %s: flags=%04x ctrl=%04x cpa=%08x\n",
+__func__, irb.scsw.flags, irb.scsw.ctrl, irb.scsw.cpa);
+fprintf(stderr, "CH DBG: %s: dstat=%02x cstat=%02x count=%04x\n",
+__func__, irb.scsw.dstat, irb.scsw.cstat, irb.scsw.count);
+for (i = 0; i < ARRAY_SIZE(irb.ecw); i++) {
+fprintf(stderr, "CH DBG: %s: ecw[%d]=%08x\n", __func__,
+i, irb.ecw[i]);
+}
 copy_irb_to_guest(target_irb, , p, irb_len);
 
 return ((stctl & SCSW_STCTL_STATUS_PEND) == 0);

-8<--8<-

and the following kernel patch

-8<--8<-

diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 83da53c..ea4db09 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -540,6 +540,9 @@ callback:
create_fake_irb(>private->irb,
cdev->private->flags.fake_irb);
cdev->private->flags.fake_irb = 0;
+   CIO_TRACE_EVENT(0, "fake_irb");
+   CIO_HEX_EVENT(0, >private->irb,
+ sizeof(struct irb));
if (cdev->handler)
cdev->handler(cdev, cdev->private->intparm,
  >private->irb);
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 6acd0b5..e9bf357 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -446,6 +446,8 @@ ccw_device_call_handler(struct ccw_device *cdev)
/*
 * Now we are ready to call the device driver interrupt handler.
 */
+   CIO_TRACE_EVENT(0, "irb");
+   CIO_HEX_EVENT(0, >private->irb, sizeof(struct irb));
if (cdev->handler)
cdev->handler(cdev, cdev->private->intparm,
  >private->irb);

-8<--8<-

Just to verify that qemu will produce and the kernel end up with the
irb I'd expect. I'd rather prefer us getting the dma stuff right
instead of chasing qemu issues :/

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v1 2/2] dma-mapping-common: add DMA attribute - DMA_ATTR_IOMMU_BYPASS

2015-11-03 Thread Benjamin Herrenschmidt
On Tue, 2015-11-03 at 14:11 +0100, Christoph Hellwig wrote:
> > xHCI for example, vs. something like 10G ethernet... but yes I agree it
> > sucks. I don't like that sort of policy anywhere in drivers. On the
> > other hand the platform doesn't have much information to make that sort
> > of decision either.
> 
> Mabye because it should simply use what's optimal?  E.g. passthrough
> whenever possible, where arguments against possible are:  dma_mask, vfio
> requirements, kernel command line option. 

Right this is what I do today on powerpc with the exception of
the command line option.

>  This is what a lot of
> architectures already do, I remember the SGI Origin / Altix code has the
> same behavior as well.  Those IOMMUs already had the 64 bit passthrough
> and 32-bit sliding window in addition to the real IOMMU 10 years ago.
> --
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM/arm: kernel low level debug support for ARM32 virtual platforms

2015-11-03 Thread Russell King - ARM Linux
On Tue, Nov 03, 2015 at 11:33:17AM -0500, Christopher Covington wrote:
> > diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
> > index a2e16f9..d126bd4 100644
> > --- a/arch/arm/Kconfig.debug
> > +++ b/arch/arm/Kconfig.debug
> > @@ -1155,6 +1155,28 @@ choice
> >   This option selects UART0 on VIA/Wondermedia System-on-a-chip
> >   devices, including VT8500, WM8505, WM8650 and WM8850.
> >  
> > +   config DEBUG_VIRT_UART_QEMU
> > +   bool "Kernel low-level debugging on QEMU Virtual Platform"
> > +   depends on ARCH_VIRT
> > +   select DEBUG_UART_PL01X
> > +   help
> > + Say Y here if you want the debug print routines to direct
> > + their output to PL011 UART port on QEMU Virtual Platform.
> > + Appropriate address values are:
> > +   PHYSVIRT
> > +   0x900   0xf809
> 
> I thought the only guarantee the virt machine had about the memory map was
> that it would be described in the device tree.

This LL debug stuff is used prior to device tree being parsed.

-- 
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH/RFC 0/4] dma ops and virtio

2015-11-03 Thread Andy Lutomirski
On Tue, Nov 3, 2015 at 9:59 AM, Cornelia Huck  wrote:
> It's just failing very early in the setup phase. As it works for me
> with a kvm setup, I'm suspecting some error in qemu's emulation code,
> which is unfortunately not my turf.
>

That should be easy to rule out.  Can you try with -machine accel=tcg?
 I can't test with kvm for obvious reasons.

> Some more poke-around-in-the-dark ideas:
>
> - Do you get more debug out put when you switch back to s390-ccw-virtio
> (virtio-1), i.e. does cmd 83 work and is it followed by further
> commands?

I'll play with this stuff later today.

--Andy
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM/arm: kernel low level debug support for ARM32 virtual platforms

2015-11-03 Thread Mario Smarduch


On 11/3/2015 9:55 AM, Will Deacon wrote:
> On Tue, Nov 03, 2015 at 09:44:52AM -0800, Mario Smarduch wrote:
>> On 11/3/2015 8:33 AM, Christopher Covington wrote:
>>> On 11/02/2015 06:51 PM, Mario Smarduch wrote:
this is a re-post from couple weeks ago, please take time to review 
 this 
 simple patch which simplifies DEBUG_LL and prevents kernel crash on 
 virtual 
 platforms.

 Before this patch DEBUG_LL for 'dummy virtual machine':

 ( ) Kernel low-level debugging via EmbeddedICE DCC channel
 ( ) Kernel low-level debug output via semihosting I/O
 ( ) Kernel low-level debugging via 8250 UART
 ( ) Kernel low-level debugging via ARM Ltd PL01x Primecell

 In summary if debug uart is not emulated kernel crashes.
 And once you pass that hurdle, uart physical/virtual addresses are unknown.
 DEBUG_LL comes in handy on many occasions and should be somewhat 
 intuitive to use like it is for physical platforms. For virtual platforms
 user may start daubting the host and get into a bigger mess.

 After this patch is applied user gets:

 (X) Kernel low-level debugging on QEMU Virtual Platform
 ( ) Kernel low-level debugging on Kvmtool Virtual Platform
. above repeated 

 The virtual addresses selected follow arm reference models, high in 
 vmalloc 
 section with high mem enabled and guest running with >= 1GB of memory. The 
 offset is leftover from arm reference models.
>>>
>>> Which model? It doesn't appear to match the vexpress AEM/RTSM/FVP/whatever
>>> which used 0x1c09 for UART0.
>>
>> I recall QEMU virt model had it's own physical address map, for sure I saw 
>> the
>> virtio-mmio regions assigned in some ARM document. Peter would you know?
>>
>> As far as kvmtool I'm not sure, currently PC1 COM1 port is used? Andre will 
>> that
>> stay fixed?
> 
> We make absolutely no guarantees about the memory map provided by kvmtool.
> 
> Will
> 

If that's also the case for qemu, then I guess the best you can do is find a way
to dump the device tree. Find the uart, physical address and try figure out the
virtual address.

Pretty involved, hoped for something more automated since that's a handy 
feature.

Thanks,
- Mario.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 2/8] vfio: platform: add capability to register a reset function

2015-11-03 Thread Eric Auger
In preparation for subsequent changes in reset function lookup,
lets introduce a dynamic list of reset combos (compat string,
reset module, reset function). The list can be populated/voided with
vfio_platform_register/unregister_reset. Those are not yet used in
this patch.

Signed-off-by: Eric Auger 
Reviewed-by: Arnd Bergmann 

---

v4 -> v5:
- add Arnd's R-b

v3 -> v4:
- __vfio_platform_register_reset does not return any value anymore
- vfio_platform_unregister_reset also takes the reset function pointer
  as parameter

v2 -> v3:
- use goto out to have a single mutex_unlock
- implement vfio_platform_register_reset as a macro (suggested by Arnd)
- move reset_node struct declaration back to vfio_platform_private.h
- vfio_platform_unregister_reset does not return any value anymore

v1 -> v2:
- reset_list becomes static
- vfio_platform_register/unregister_reset take a const char * as compat
- fix node leak
- add reset_lock to protect the reset list manipulation
- move vfio_platform_reset_node declaration in vfio_platform_common.c
---
 drivers/vfio/platform/vfio_platform_common.c  | 27 +++
 drivers/vfio/platform/vfio_platform_private.h | 20 
 2 files changed, 47 insertions(+)

diff --git a/drivers/vfio/platform/vfio_platform_common.c 
b/drivers/vfio/platform/vfio_platform_common.c
index 184e9d2..3b7e52c 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -27,6 +27,7 @@
 #define DRIVER_AUTHOR   "Antonios Motakis "
 #define DRIVER_DESC "VFIO platform base module"
 
+static LIST_HEAD(reset_list);
 static DEFINE_MUTEX(driver_lock);
 
 static const struct vfio_platform_reset_combo reset_lookup_table[] = {
@@ -578,6 +579,32 @@ struct vfio_platform_device 
*vfio_platform_remove_common(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(vfio_platform_remove_common);
 
+void __vfio_platform_register_reset(struct vfio_platform_reset_node *node)
+{
+   mutex_lock(_lock);
+   list_add(>link, _list);
+   mutex_unlock(_lock);
+}
+EXPORT_SYMBOL_GPL(__vfio_platform_register_reset);
+
+void vfio_platform_unregister_reset(const char *compat,
+   vfio_platform_reset_fn_t fn)
+{
+   struct vfio_platform_reset_node *iter, *temp;
+
+   mutex_lock(_lock);
+   list_for_each_entry_safe(iter, temp, _list, link) {
+   if (!strcmp(iter->compat, compat) && (iter->reset == fn)) {
+   list_del(>link);
+   break;
+   }
+   }
+
+   mutex_unlock(_lock);
+
+}
+EXPORT_SYMBOL_GPL(vfio_platform_unregister_reset);
+
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR(DRIVER_AUTHOR);
diff --git a/drivers/vfio/platform/vfio_platform_private.h 
b/drivers/vfio/platform/vfio_platform_private.h
index 7128690..c563940 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -71,6 +71,15 @@ struct vfio_platform_device {
int (*reset)(struct vfio_platform_device *vdev);
 };
 
+typedef int (*vfio_platform_reset_fn_t)(struct vfio_platform_device *vdev);
+
+struct vfio_platform_reset_node {
+   struct list_head link;
+   char *compat;
+   struct module *owner;
+   vfio_platform_reset_fn_t reset;
+};
+
 struct vfio_platform_reset_combo {
const char *compat;
const char *reset_function_name;
@@ -90,4 +99,15 @@ extern int vfio_platform_set_irqs_ioctl(struct 
vfio_platform_device *vdev,
unsigned start, unsigned count,
void *data);
 
+extern void __vfio_platform_register_reset(struct vfio_platform_reset_node *n);
+extern void vfio_platform_unregister_reset(const char *compat,
+  vfio_platform_reset_fn_t fn);
+#define vfio_platform_register_reset(__compat, __reset)\
+static struct vfio_platform_reset_node __reset ## _node = {\
+   .owner = THIS_MODULE,   \
+   .compat = __compat, \
+   .reset = __reset,   \
+}; \
+__vfio_platform_register_reset(&__reset ## _node)
+
 #endif /* VFIO_PLATFORM_PRIVATE_H */
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 0/8] VFIO platform reset module rework

2015-11-03 Thread Eric Auger
This series fixes the current implementation by getting rid of the
usage of __symbol_get which caused a compilation issue with
CONFIG_MODULES disabled. On top of this, the usage of MODULE_ALIAS makes
possible to add a new reset module without being obliged to update the
framework. The new implementation relies on the reset module registering
its reset function to the vfio-platform driver.

The series is available at

https://git.linaro.org/people/eric.auger/linux.git/shortlog/refs/heads/v4.3-rework-v6

Best Regards

Eric

v5 -> v6:
- add "vfio: platform: reset: calxedaxgmac: fix ioaddr leak"

v4 -> v5:
- no code change
- only added Arnd's new R-b

v3 -> v4:
- Remove the EXPORT_SYMBOL_GPL(vfio_platform_calxedaxgmac_reset) later
  in [6/7], to keep the functionality working all along the series
- Add Arnd R-b (I dared to keep them despite the above change)
- vfio_platform_unregister_reset gets the reset function to do a double
  check on the compat and the function pointer too
- __vfio_platform_register_reset turned to 'void'

v2 -> v3:
- use driver_mutex instead of reset_mutex
- style fixes: single mutex_unlock
- use static nodes; vfio_platform_register_reset now is a macro
- vfio_platform_reset_private.h removed since reset_module_(un)register
  disappear. No use of symbol_get anymore.
- new patch introducing vfio-platform-base
- reset look-up moved back at vfio-platform probe time
- new patch featuring dev_info/dev_warn

v1 -> v2:
* in vfio_platform_common.c:
  - move reset lookup at load time and put reset at release: this is to
prevent a race between the 2 load module loads
  - reset_list becomes static
  - vfio_platform_register/unregister_reset take a const char * as compat
  - fix node link
  - remove old combo struct and cleanup proto of vfio_platform_get_reset
  - add mutex to protect the reset list
* in calxeda xgmac reset module
  - introduce vfio_platform_reset_private.h
  - use module_vfio_reset_handler macro
  - do not export vfio_platform_calxedaxgmac_reset symbol anymore
  - add a pr_info to show the device is reset by vfio reset module



Eric Auger (8):
  vfio: platform: introduce vfio-platform-base module
  vfio: platform: add capability to register a reset function
  vfio: platform: introduce module_vfio_reset_handler macro
  vfio: platform: reset: calxedaxgmac: add reset function registration
  vfio: platform: add compat in vfio_platform_device
  vfio: platform: use list of registered reset function
  vfio: platform: add dev_info on device reset
  vfio: platform: reset: calxedaxgmac: fix ioaddr leak

 drivers/vfio/platform/Makefile |   6 +-
 .../platform/reset/vfio_platform_calxedaxgmac.c|  19 ++--
 drivers/vfio/platform/vfio_amba.c  |   1 +
 drivers/vfio/platform/vfio_platform.c  |   1 +
 drivers/vfio/platform/vfio_platform_common.c   | 119 +++--
 drivers/vfio/platform/vfio_platform_private.h  |  40 ++-
 6 files changed, 137 insertions(+), 49 deletions(-)

-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5] VFIO: platform: reset: AMD xgbe reset module

2015-11-03 Thread Eric Auger
This patch introduces a module that registers and implements a low-level
reset function for the AMD XGBE device.

it performs the following actions:
- reset the PHY
- disable auto-negotiation
- disable & clear auto-negotiation IRQ
- soft-reset the MAC

Those tiny pieces of code are inherited from the native xgbe driver.

Signed-off-by: Eric Auger 
Reviewed-by: Arnd Bergmann 

---

Applies on top of [PATCH v6 0/8] VFIO platform reset module rework

v4 -> v5:
- fix ioaddr leak
  iounmap is done in vfio platform driver (vfio_platform_regions_cleanup)
  after reset (vfio_platform_release)

v3 -> v4:
- add Arnd's R-b

v2 -> v3:
- in Kconfig, add empty line between the 2 options
- remove DRIVER_VERSION, DRIVER_AUTHOR and DRIVER_DESC and put
  strings directly in MODULE macros

v1 -> v2:
- uses module_vfio_reset_handler macro
---
 drivers/vfio/platform/reset/Kconfig|   8 ++
 drivers/vfio/platform/reset/Makefile   |   2 +
 .../vfio/platform/reset/vfio_platform_amdxgbe.c| 127 +
 3 files changed, 137 insertions(+)
 create mode 100644 drivers/vfio/platform/reset/vfio_platform_amdxgbe.c

diff --git a/drivers/vfio/platform/reset/Kconfig 
b/drivers/vfio/platform/reset/Kconfig
index 746b96b..705 100644
--- a/drivers/vfio/platform/reset/Kconfig
+++ b/drivers/vfio/platform/reset/Kconfig
@@ -5,3 +5,11 @@ config VFIO_PLATFORM_CALXEDAXGMAC_RESET
  Enables the VFIO platform driver to handle reset for Calxeda xgmac
 
  If you don't know what to do here, say N.
+
+config VFIO_PLATFORM_AMDXGBE_RESET
+   tristate "VFIO support for AMD XGBE reset"
+   depends on VFIO_PLATFORM
+   help
+ Enables the VFIO platform driver to handle reset for AMD XGBE
+
+ If you don't know what to do here, say N.
diff --git a/drivers/vfio/platform/reset/Makefile 
b/drivers/vfio/platform/reset/Makefile
index 2a486af..93f4e23 100644
--- a/drivers/vfio/platform/reset/Makefile
+++ b/drivers/vfio/platform/reset/Makefile
@@ -1,5 +1,7 @@
 vfio-platform-calxedaxgmac-y := vfio_platform_calxedaxgmac.o
+vfio-platform-amdxgbe-y := vfio_platform_amdxgbe.o
 
 ccflags-y += -Idrivers/vfio/platform
 
 obj-$(CONFIG_VFIO_PLATFORM_CALXEDAXGMAC_RESET) += vfio-platform-calxedaxgmac.o
+obj-$(CONFIG_VFIO_PLATFORM_AMDXGBE_RESET) += vfio-platform-amdxgbe.o
diff --git a/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c 
b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c
new file mode 100644
index 000..da5356f
--- /dev/null
+++ b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c
@@ -0,0 +1,127 @@
+/*
+ * VFIO platform driver specialized for AMD xgbe reset
+ * reset code is inherited from AMD xgbe native driver
+ *
+ * Copyright (c) 2015 Linaro Ltd.
+ *  www.linaro.org
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vfio_platform_private.h"
+
+#define DMA_MR 0x3000
+#define MAC_VR 0x0110
+#define DMA_ISR0x3008
+#define MAC_ISR0x00b0
+#define PCS_MMD_SELECT 0xff
+#define MDIO_AN_INT0x8002
+#define MDIO_AN_INTMASK0x8001
+
+static unsigned int xmdio_read(void *ioaddr, unsigned int mmd,
+  unsigned int reg)
+{
+   unsigned int mmd_address, value;
+
+   mmd_address = (mmd << 16) | ((reg) & 0x);
+   iowrite32(mmd_address >> 8, ioaddr + (PCS_MMD_SELECT << 2));
+   value = ioread32(ioaddr + ((mmd_address & 0xff) << 2));
+   return value;
+}
+
+static void xmdio_write(void *ioaddr, unsigned int mmd,
+   unsigned int reg, unsigned int value)
+{
+   unsigned int mmd_address;
+
+   mmd_address = (mmd << 16) | ((reg) & 0x);
+   iowrite32(mmd_address >> 8, ioaddr + (PCS_MMD_SELECT << 2));
+   iowrite32(value, ioaddr + ((mmd_address & 0xff) << 2));
+}
+
+int vfio_platform_amdxgbe_reset(struct vfio_platform_device *vdev)
+{
+   struct vfio_platform_region *xgmac_regs = >regions[0];
+   struct vfio_platform_region *xpcs_regs = >regions[1];
+   u32 dma_mr_value, pcs_value, value;
+   unsigned int count;
+
+   if (!xgmac_regs->ioaddr) {
+   xgmac_regs->ioaddr =
+   ioremap_nocache(xgmac_regs->addr, xgmac_regs->size);
+   if 

[PATCH] KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0()

2015-11-03 Thread Laszlo Ersek
Commit b18d5431acc7 ("KVM: x86: fix CR0.CD virtualization") was
technically correct, but it broke OVMF guests by slowing down various
parts of the firmware.

Commit fb279950ba02 ("KVM: vmx: obey KVM_QUIRK_CD_NW_CLEARED") quirked the
first function modified by b18d5431acc7, vmx_get_mt_mask(), for OVMF's
sake. This restored the speed of the OVMF code that runs before
PlatformPei (including the memory intensive LZMA decompression in SEC).

This patch extends the quirk to the second function modified by
b18d5431acc7, kvm_set_cr0(). It eliminates the intrusive slowdown that
hits the EFI_MP_SERVICES_PROTOCOL implementation of edk2's
UefiCpuPkg/CpuDxe -- which is built into OVMF --, when CpuDxe starts up
all APs at once for initialization, in order to count them.

We also carry over the kvm_arch_has_noncoherent_dma() sub-condition from
the other half of the original commit b18d5431acc7.

Cc: Paolo Bonzini 
Cc: Jordan Justen 
Cc: Janusz Mocek 
Cc: Alex Williamson 
Cc: Xiao Guangrong 
Signed-off-by: Laszlo Ersek 
---
 arch/x86/kvm/x86.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a24bae0..30723a4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -625,7 +625,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if ((cr0 ^ old_cr0) & update_bits)
kvm_mmu_reset_context(vcpu);
 
-   if ((cr0 ^ old_cr0) & X86_CR0_CD)
+   if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
+   kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
+   !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
 
return 0;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[vfio:next 15/15] drivers/vfio/vfio.c:1002:32-35: ERROR: driver is NULL but dereferenced.

2015-11-03 Thread kbuild test robot
tree:   https://github.com/awilliam/linux-vfio.git next
head:   f54e0bcf65bf7d2015e1aa1c6353e5feb71c3654
commit: f54e0bcf65bf7d2015e1aa1c6353e5feb71c3654 [15/15] vfio: Include No-IOMMU 
mode


coccinelle warnings: (new ones prefixed by >>)

>> drivers/vfio/vfio.c:1002:32-35: ERROR: driver is NULL but dereferenced.

vim +1002 drivers/vfio/vfio.c

0b43c082 Alex Williamson 2013-04-29   986   
down_read(>group_lock);
0b43c082 Alex Williamson 2013-04-29   987  
0b43c082 Alex Williamson 2013-04-29   988   driver = 
container->iommu_driver;
0b43c082 Alex Williamson 2013-04-29   989  
cba3345c Alex Williamson 2012-07-31   990   switch (arg) {
cba3345c Alex Williamson 2012-07-31   991   /* No base extensions 
yet */
cba3345c Alex Williamson 2012-07-31   992   default:
cba3345c Alex Williamson 2012-07-31   993   /*
cba3345c Alex Williamson 2012-07-31   994* If no driver is set, 
poll all registered drivers for
cba3345c Alex Williamson 2012-07-31   995* extensions and 
return the first positive result.  If
cba3345c Alex Williamson 2012-07-31   996* a driver is already 
set, further queries will be passed
cba3345c Alex Williamson 2012-07-31   997* only to that driver.
cba3345c Alex Williamson 2012-07-31   998*/
cba3345c Alex Williamson 2012-07-31   999   if (!driver) {
cba3345c Alex Williamson 2012-07-31  1000   
mutex_lock(_drivers_lock);
f54e0bcf Alex Williamson 2015-10-15  1001   
vfio_for_each_iommu_driver(container, driver) {
cba3345c Alex Williamson 2012-07-31 @1002   if 
(!try_module_get(driver->ops->owner))
cba3345c Alex Williamson 2012-07-31  1003   
continue;
cba3345c Alex Williamson 2012-07-31  1004  
cba3345c Alex Williamson 2012-07-31  1005   ret = 
driver->ops->ioctl(NULL,
cba3345c Alex Williamson 2012-07-31  1006   
 VFIO_CHECK_EXTENSION,
cba3345c Alex Williamson 2012-07-31  1007   
 arg);
cba3345c Alex Williamson 2012-07-31  1008   
module_put(driver->ops->owner);
cba3345c Alex Williamson 2012-07-31  1009   if (ret 
> 0)
cba3345c Alex Williamson 2012-07-31  1010   
break;

:: The code at line 1002 was first introduced by commit
:: cba3345cc494ad286ca8823f44b2c16cae496679 vfio: VFIO core

:: TO: Alex Williamson 
:: CC: Alex Williamson 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 08/35] exec: allow memory to be allocated from any kind of path

2015-11-03 Thread Eduardo Habkost
On Mon, Nov 02, 2015 at 05:13:10PM +0800, Xiao Guangrong wrote:
> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
> locates at DAX enabled filesystem
> 
> So this patch let it work on any kind of path
> 
> Signed-off-by: Xiao Guangrong 
> ---
>  exec.c | 24 
>  1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 9de38be..9075f4d 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1184,25 +1184,25 @@ static void *file_ram_alloc(RAMBlock *block,
>  char *c;
>  void *area;
>  int fd;
> -uint64_t hpagesize;
> +uint64_t pagesize;
>  Error *local_err = NULL;
>  
> -hpagesize = qemu_file_get_page_size(path, _err);
> +pagesize = qemu_file_get_page_size(path, _err);
>  if (local_err) {
>  error_propagate(errp, local_err);
>  goto error;
>  }
>  
> -if (hpagesize == getpagesize()) {
> -fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> +if (pagesize == getpagesize()) {
> +fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n");

If the point of this patch is to allow file_ram_alloc() to not be
specific to hugetlbfs anymore, this warning can simply go away.

(And in case if you really want to keep the warning, I don't see the
point of the changes you made to it.)

-- 
Eduardo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: irqchip: fix memory leak

2015-11-03 Thread William Dauchy
Hi Paolo,

I was wondering it this could be a valid candidate for -stable, don't you think?
(commit ba60c41)

Best regards,

On Sep02 12:33, Sudip Mukherjee wrote:
> We were taking the exit path after checking ue->flags and return value
> of setup_routing_entry(), but 'e' was not freed incase of a failure.
> 
> Signed-off-by: Sudip Mukherjee 
> ---
>  virt/kvm/irqchip.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
> index 21c1424..c63e54f 100644
> --- a/virt/kvm/irqchip.c
> +++ b/virt/kvm/irqchip.c
> @@ -213,11 +213,15 @@ int kvm_set_irq_routing(struct kvm *kvm,
>   goto out;
>  
>   r = -EINVAL;
> - if (ue->flags)
> + if (ue->flags) {
> + kfree(e);
>   goto out;
> + }
>   r = setup_routing_entry(new, e, ue);
> - if (r)
> + if (r) {
> + kfree(e);
>   goto out;
> + }
>   ++ue;
>   }
>  
> -- 
> 1.9.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
William


signature.asc
Description: PGP signature


Re: [PATCH v7 11/35] util: introduce qemu_file_getlength()

2015-11-03 Thread Eduardo Habkost
On Mon, Nov 02, 2015 at 05:13:13PM +0800, Xiao Guangrong wrote:
[...]
> +size_t qemu_file_getlength(const char *file, Error **errp)
> +{
> +int64_t size;
[...]
> +return size;

Can you guarantee that SIZE_MAX >= INT64_MAX on all platforms supported
by QEMU?

-- 
Eduardo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0()

2015-11-03 Thread Janusz Mocek
W dniu 03.11.2015 o 19:57, Laszlo Ersek pisze:
> On 11/03/15 19:34, Laszlo Ersek wrote:
>> Commit b18d5431acc7 ("KVM: x86: fix CR0.CD virtualization") was
>> technically correct, but it broke OVMF guests by slowing down various
>> parts of the firmware.
>>
>> Commit fb279950ba02 ("KVM: vmx: obey KVM_QUIRK_CD_NW_CLEARED") quirked the
>> first function modified by b18d5431acc7, vmx_get_mt_mask(), for OVMF's
>> sake. This restored the speed of the OVMF code that runs before
>> PlatformPei (including the memory intensive LZMA decompression in SEC).
>>
>> This patch extends the quirk to the second function modified by
>> b18d5431acc7, kvm_set_cr0(). It eliminates the intrusive slowdown that
>> hits the EFI_MP_SERVICES_PROTOCOL implementation of edk2's
>> UefiCpuPkg/CpuDxe -- which is built into OVMF --, when CpuDxe starts up
>> all APs at once for initialization, in order to count them.
>>
>> We also carry over the kvm_arch_has_noncoherent_dma() sub-condition from
>> the other half of the original commit b18d5431acc7.
>>
>> Cc: Paolo Bonzini 
>> Cc: Jordan Justen 
>> Cc: Janusz Mocek 
>> Cc: Alex Williamson 
>> Cc: Xiao Guangrong 
>> Signed-off-by: Laszlo Ersek 
>> ---
>>  arch/x86/kvm/x86.c | 4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index a24bae0..30723a4 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -625,7 +625,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>>  if ((cr0 ^ old_cr0) & update_bits)
>>  kvm_mmu_reset_context(vcpu);
>>  
>> -if ((cr0 ^ old_cr0) & X86_CR0_CD)
>> +if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
>> +kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
>> +!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
>>  kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
>>  
>>  return 0;
>>
>
> I had notes on this patch, but I forgot to format it with --notes. They
> were:
>
> - People on the CC list, please reply with your Tested-by, Reported-by,
>   etc tags as appropriate; it's getting blurry who participated in what
>   and how.
>
> - This patch is *not* necessary for the OVMF SMM work; instead it
>   addresses an independent OVMF boot regression seen by users.
>
> Thanks
> Laszlo
Tested-by: Janusz Mocek 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PPC: Book3S HV: Synthesize segment fault if SLB lookup fails

2015-11-03 Thread David Gibson
On Tue, Oct 27, 2015 at 04:13:56PM +1100, Paul Mackerras wrote:
> When handling a hypervisor data or instruction storage interrupt (HDSI
> or HISI), we look up the SLB entry for the address being accessed in
> order to translate the effective address to a virtual address which can
> be looked up in the guest HPT.  This lookup can occasionally fail due
> to the guest replacing an SLB entry without invalidating the evicted
> SLB entry.  In this situation an ERAT (effective to real address
> translation cache) entry can persist and be used by the hardware even
> though there is no longer a corresponding SLB entry.
> 
> Previously we would just deliver a data or instruction storage interrupt
> (DSI or ISI) to the guest in this case.  However, this is not correct
> and has been observed to cause guests to crash, typically with a
> data storage protection interrupt on a store to the vmemmap area.
> 
> Instead, what we do now is to synthesize a data or instruction segment
> interrupt.  That should cause the guest to reload an appropriate entry
> into the SLB and retry the faulting instruction.  If it still faults,
> we should find an appropriate SLB entry next time and be able to handle
> the fault.
> 
> Signed-off-by: Paul Mackerras 

Reviewed-by: David Gibson 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH] KVM: PPC: Book3S HV: Synthesize segment fault if SLB lookup fails

2015-11-03 Thread David Gibson
On Tue, Oct 27, 2015 at 04:13:56PM +1100, Paul Mackerras wrote:
> When handling a hypervisor data or instruction storage interrupt (HDSI
> or HISI), we look up the SLB entry for the address being accessed in
> order to translate the effective address to a virtual address which can
> be looked up in the guest HPT.  This lookup can occasionally fail due
> to the guest replacing an SLB entry without invalidating the evicted
> SLB entry.  In this situation an ERAT (effective to real address
> translation cache) entry can persist and be used by the hardware even
> though there is no longer a corresponding SLB entry.
> 
> Previously we would just deliver a data or instruction storage interrupt
> (DSI or ISI) to the guest in this case.  However, this is not correct
> and has been observed to cause guests to crash, typically with a
> data storage protection interrupt on a store to the vmemmap area.
> 
> Instead, what we do now is to synthesize a data or instruction segment
> interrupt.  That should cause the guest to reload an appropriate entry
> into the SLB and retry the faulting instruction.  If it still faults,
> we should find an appropriate SLB entry next time and be able to handle
> the fault.
> 
> Signed-off-by: Paul Mackerras 

Reviewed-by: David Gibson 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH v7 08/35] exec: allow memory to be allocated from any kind of path

2015-11-03 Thread Xiao Guangrong



On 11/04/2015 07:00 AM, Eduardo Habkost wrote:

On Mon, Nov 02, 2015 at 05:13:10PM +0800, Xiao Guangrong wrote:

Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
locates at DAX enabled filesystem

So this patch let it work on any kind of path

Signed-off-by: Xiao Guangrong 
---
  exec.c | 24 
  1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/exec.c b/exec.c
index 9de38be..9075f4d 100644
--- a/exec.c
+++ b/exec.c
@@ -1184,25 +1184,25 @@ static void *file_ram_alloc(RAMBlock *block,
  char *c;
  void *area;
  int fd;
-uint64_t hpagesize;
+uint64_t pagesize;
  Error *local_err = NULL;

-hpagesize = qemu_file_get_page_size(path, _err);
+pagesize = qemu_file_get_page_size(path, _err);
  if (local_err) {
  error_propagate(errp, local_err);
  goto error;
  }

-if (hpagesize == getpagesize()) {
-fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+if (pagesize == getpagesize()) {
+fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n");


If the point of this patch is to allow file_ram_alloc() to not be
specific to hugetlbfs anymore, this warning can simply go away.

(And in case if you really want to keep the warning, I don't see the
point of the changes you made to it.)



This is the history why we did it like this:
https://lists.gnu.org/archive/html/qemu-devel/2015-10/msg02862.html

Q:
| What this *actually* is trying to warn against is that
| mapping a regular file (as opposed to hugetlbfs)
| means transparent huge pages don't work.

| So I don't think we should drop this warning completely.
| Either let's add the nvdimm magic, or simply check the
| page size.

A:
| Check the page size sounds good, will check:
| if (pagesize != getpagesize()) {
|...print something...
|}

| I agree with you that showing the info is needed, however,
| 'Warning' might scare some users, how about drop this word or
| just show “Memory is not allocated from HugeTlbfs”?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 11/35] util: introduce qemu_file_getlength()

2015-11-03 Thread Xiao Guangrong



On 11/04/2015 07:21 AM, Eduardo Habkost wrote:

On Mon, Nov 02, 2015 at 05:13:13PM +0800, Xiao Guangrong wrote:
[...]

+size_t qemu_file_getlength(const char *file, Error **errp)
+{
+int64_t size;

[...]

+return size;


Can you guarantee that SIZE_MAX >= INT64_MAX on all platforms supported
by QEMU?



Actually, this function is abstracted from the common function, raw_getlength(),
in raw-posix.c whose return value is int64_t.

And i think int64_t is large enough for block devices.



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2] KVM: VMX: Fix commit which broke PML

2015-11-03 Thread Kai Huang
I found PML was broken since below commit:

commit feda805fe7c4ed9cf78158e73b1218752e3b4314
Author: Xiao Guangrong 
Date:   Wed Sep 9 14:05:55 2015 +0800

KVM: VMX: unify SECONDARY_VM_EXEC_CONTROL update

Unify the update in vmx_cpuid_update()

Signed-off-by: Xiao Guangrong 
[Rewrite to use vmcs_set_secondary_exec_control. - Paolo]
Signed-off-by: Paolo Bonzini 

The reason is in above commit vmx_cpuid_update calls vmx_secondary_exec_control,
in which currently SECONDARY_EXEC_ENABLE_PML bit is cleared unconditionally (as
PML is enabled in creating vcpu). Therefore if vcpu_cpuid_update is called after
vcpu is created, PML will be disabled unexpectedly while log-dirty code still
thinks PML is used.

Fix this by clearing SECONDARY_EXEC_ENABLE_PML in vmx_secondary_exec_control
only when PML is not supported or not enabled (!enable_pml). This is more
reasonable as PML is currently either always enabled or disabled. With this
explicit updating SECONDARY_EXEC_ENABLE_PML in vmx_enable{disable}_pml is not
needed so also rename vmx_enable{disable}_pml to vmx_create{destroy}_pml_buffer.

Signed-off-by: Kai Huang 

---

v1->v2: Fix this by following Paolo's suggestion. It's better to not to clear
SECONDARY_EXEC_ENABLE_PML in vmx_secondary_exec_control unconditionally but only
clear it when PML is not supported or enabled.

---
 arch/x86/kvm/vmx.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2ac11641..89f4fa2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4718,8 +4718,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx 
*vmx)
   a current VMCS12
*/
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
-   /* PML is enabled/disabled in creating/destorying vcpu */
-   exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
+
+   if (!enable_pml)
+   exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
/* Currently, we allow L1 guest to directly run pcommit instruction. */
exec_control &= ~SECONDARY_EXEC_PCOMMIT;
@@ -7804,7 +7805,7 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 
*info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
 
-static int vmx_enable_pml(struct vcpu_vmx *vmx)
+static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
 {
struct page *pml_pg;
 
@@ -7817,12 +7818,10 @@ static int vmx_enable_pml(struct vcpu_vmx *vmx)
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
 
-   vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_ENABLE_PML);
-
return 0;
 }
 
-static void vmx_disable_pml(struct vcpu_vmx *vmx)
+static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
 {
ASSERT(vmx->pml_pg);
__free_page(vmx->pml_pg);
@@ -8706,7 +8705,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
 
if (enable_pml)
-   vmx_disable_pml(vmx);
+   vmx_destroy_pml_buffer(vmx);
free_vpid(vmx->vpid);
leave_guest_mode(vcpu);
vmx_load_vmcs01(vcpu);
@@ -8790,7 +8789,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
 * for the guest, etc.
 */
if (enable_pml) {
-   err = vmx_enable_pml(vmx);
+   err = vmx_create_pml_buffer(vmx);
if (err)
goto free_vmcs;
}
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0()

2015-11-03 Thread Xiao Guangrong



On 11/04/2015 02:34 AM, Laszlo Ersek wrote:

Commit b18d5431acc7 ("KVM: x86: fix CR0.CD virtualization") was
technically correct, but it broke OVMF guests by slowing down various
parts of the firmware.

Commit fb279950ba02 ("KVM: vmx: obey KVM_QUIRK_CD_NW_CLEARED") quirked the
first function modified by b18d5431acc7, vmx_get_mt_mask(), for OVMF's
sake. This restored the speed of the OVMF code that runs before
PlatformPei (including the memory intensive LZMA decompression in SEC).

This patch extends the quirk to the second function modified by
b18d5431acc7, kvm_set_cr0(). It eliminates the intrusive slowdown that
hits the EFI_MP_SERVICES_PROTOCOL implementation of edk2's
UefiCpuPkg/CpuDxe -- which is built into OVMF --, when CpuDxe starts up
all APs at once for initialization, in order to count them.

We also carry over the kvm_arch_has_noncoherent_dma() sub-condition from
the other half of the original commit b18d5431acc7.


Reviewed-by: Xiao Guangrong 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: VMX: Fix commit which broke PML

2015-11-03 Thread Kai Huang



On 11/03/2015 05:59 PM, Paolo Bonzini wrote:


On 03/11/2015 06:49, Kai Huang wrote:

I found PML was broken since below commit:

commit feda805fe7c4ed9cf78158e73b1218752e3b4314
Author: Xiao Guangrong 
Date:   Wed Sep 9 14:05:55 2015 +0800

KVM: VMX: unify SECONDARY_VM_EXEC_CONTROL update

Unify the update in vmx_cpuid_update()

Signed-off-by: Xiao Guangrong 
[Rewrite to use vmcs_set_secondary_exec_control. - Paolo]
Signed-off-by: Paolo Bonzini 

The reason is PML after above commit vmx_cpuid_update calls
vmx_secondary_exec_control, in which PML is disabled unconditionally, as PML is
enabled in creating vcpu. Therefore if vcpu_cpuid_update is called after vcpu is
created, PML will be disabled unexpectedly while log-dirty code still think PML
is used. Actually looks calling vmx_secondary_exec_control in vmx_cpuid_update
is likely to break any VMX features that is enabled/disabled on demand by
updating SECONDARY_VM_EXEC_CONTROL, if vmx_cpuid_update is called between the
feature is enabled and disabled.

Fix this by calling vmcs_read32 to read out SECONDARY_VM_EXEC_CONTROL directly.

vmx_cpuid_update() is meant to be mostly idempotent; the parts that
depend on the current VMCS configuration are hidden in
vmcs_set_secondary_control.  So a better fix would be to add
SECONDARY_EXEC_ENABLE_PML to vmcs_set_secondary_exec_control's
"mask" variable.  However, you can see from the comment:

/*
 * These bits in the secondary execution controls field
 * are dynamic, the others are mostly based on the hypervisor
 * architecture and the guest's CPUID. Do not touch the
 * dynamic bits.
 */

that even this is not the optimal fix.  SECONDARY_EXEC_ENABLE_PML is
either always set or always clear, so it shouldn't be in "mask".

Instead, it should be in vmcs_config.cpu_based_2nd_exec_ctrl.  It isn't
because my review didn't notice this remnant of your original
implementation, which dynamically enabled/disabled PML.

In fact, cpu_has_vmx_pml() expects SECONDARY_EXEC_ENABLE_PML to be set
in vmcs_config.cpu_based_2nd_exec_ctrl, so it is a bit confusing to
remove the bit unconditionally in vmx_secondary_exec_control!

So I think SECONDARY_EXEC_ENABLE_PML should not be removed unconditionally
from exec_control in vmx_secondary_exec_control; the removal should be
conditional on !enable_pml, like we do for e.g. EPT or VPID.  If you do this,
vmx_enable_pml and vmx_disable_pml need not touch SECONDARY_VM_EXEC_CONTROL
anymore.  Do you agree?  If so, can you prepare a patch along these lines?

Thanks Paolo for your comments.

Sure I agree. I will send out the v2 patch by following what you suggested.



(Since you are at it, perhaps you can rename vmx_enable_pml and
vmx_disable_pml, since they will only allocate and free the PML page).
I intend to rename vmx_enable{disable}_pml to 
vmx_create{destroy}_pml_buffer, as besides allocating buffer, we also 
need to write buffer address and PML index to VMCS.


Thanks,
-Kai


Thanks for reporting the issue!

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RESEND PATCH v2] KVM: VMX: Fix commit which broke PML

2015-11-03 Thread Kai Huang
I found PML was broken since below commit:

commit feda805fe7c4ed9cf78158e73b1218752e3b4314
Author: Xiao Guangrong 
Date:   Wed Sep 9 14:05:55 2015 +0800

KVM: VMX: unify SECONDARY_VM_EXEC_CONTROL update

Unify the update in vmx_cpuid_update()

Signed-off-by: Xiao Guangrong 
[Rewrite to use vmcs_set_secondary_exec_control. - Paolo]
Signed-off-by: Paolo Bonzini 

The reason is in above commit vmx_cpuid_update calls vmx_secondary_exec_control,
in which currently SECONDARY_EXEC_ENABLE_PML bit is cleared unconditionally (as
PML is enabled in creating vcpu). Therefore if vcpu_cpuid_update is called after
vcpu is created, PML will be disabled unexpectedly while log-dirty code still
thinks PML is used.

Fix this by clearing SECONDARY_EXEC_ENABLE_PML in vmx_secondary_exec_control
only when PML is not supported or not enabled (!enable_pml). This is more
reasonable as PML is currently either always enabled or disabled. With this
explicit updating SECONDARY_EXEC_ENABLE_PML in vmx_enable{disable}_pml is not
needed so also rename vmx_enable{disable}_pml to vmx_create{destroy}_pml_buffer.

Signed-off-by: Kai Huang 
---

Sorry previous patch missed PATCH subject prefix. Resend by fixing that.

v1->v2: Fix this by following Paolo's suggestion. It's better to not to clear
SECONDARY_EXEC_ENABLE_PML in vmx_secondary_exec_control unconditionally but only
clear it when PML is not supported or enabled.

---
 arch/x86/kvm/vmx.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2ac11641..89f4fa2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4718,8 +4718,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx 
*vmx)
   a current VMCS12
*/
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
-   /* PML is enabled/disabled in creating/destorying vcpu */
-   exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
+
+   if (!enable_pml)
+   exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
/* Currently, we allow L1 guest to directly run pcommit instruction. */
exec_control &= ~SECONDARY_EXEC_PCOMMIT;
@@ -7804,7 +7805,7 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 
*info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
 
-static int vmx_enable_pml(struct vcpu_vmx *vmx)
+static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
 {
struct page *pml_pg;
 
@@ -7817,12 +7818,10 @@ static int vmx_enable_pml(struct vcpu_vmx *vmx)
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
 
-   vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_ENABLE_PML);
-
return 0;
 }
 
-static void vmx_disable_pml(struct vcpu_vmx *vmx)
+static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
 {
ASSERT(vmx->pml_pg);
__free_page(vmx->pml_pg);
@@ -8706,7 +8705,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
 
if (enable_pml)
-   vmx_disable_pml(vmx);
+   vmx_destroy_pml_buffer(vmx);
free_vpid(vmx->vpid);
leave_guest_mode(vcpu);
vmx_load_vmcs01(vcpu);
@@ -8790,7 +8789,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
 * for the guest, etc.
 */
if (enable_pml) {
-   err = vmx_enable_pml(vmx);
+   err = vmx_create_pml_buffer(vmx);
if (err)
goto free_vmcs;
}
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 20/35] dimm: get mapped memory region from DIMMDeviceClass->get_memory_region

2015-11-03 Thread Xiao Guangrong



On 11/03/2015 12:16 AM, Vladimir Sementsov-Ogievskiy wrote:

On 02.11.2015 18:06, Xiao Guangrong wrote:



On 11/02/2015 10:26 PM, Vladimir Sementsov-Ogievskiy wrote:

On 02.11.2015 16:08, Xiao Guangrong wrote:



On 11/02/2015 08:19 PM, Vladimir Sementsov-Ogievskiy wrote:

On 02.11.2015 12:13, Xiao Guangrong wrote:

Curretly, the memory region of backed memory is directly mapped to
guest's address space, however, it is not true for nvdimm device

This patch let dimm device realize this fact and use
DIMMDeviceClass->get_memory_region method to get the mapped memory
region

Current code did not check the return value of get_memory_region as it
assumed the backend memory of pc-dimm is always properly initialized,
we make get_memory_region internally catch the case if something is
wrong


but here you call not pc-dimm's get_memory_region, but common 
ddc->get_memory_region, which may be
nvdimm or possibly other future dimm, so, why not check it here? And than 
pc_dimm_get_memory_region
may be left untouched (error_abort is ok, because errp is unused).


Hmm, because 'here' is not the only place calling ->get_memory_region, this 
method has
multiple callers:

$ git grep "\->get_memory_region"
hw/i386/pc.c:MemoryRegion *mr = ddc->get_memory_region(dimm);
hw/i386/pc.c:MemoryRegion *mr = ddc->get_memory_region(dimm);
hw/mem/dimm.c:mr = ddc->get_memory_region(dimm);
hw/mem/nvdimm.c:ddc->get_memory_region = nvdimm_get_memory_region;
hw/mem/pc-dimm.c:ddc->get_memory_region = pc_dimm_get_memory_region;
hw/ppc/spapr.c:MemoryRegion *mr = ddc->get_memory_region(dimm);

memory region validation is also done for NVDIMM in nvdimm device.


Ok, then it should be documented by a comment in dimm.h, where DIMMDeviceClass 
is defined, that this
function should not fail



Okay, how about this comment:

/*
 * get the memory region which will be mapped into guest's address
 * space. It is called after dimm device realized so it is never
 * failed.
 */
MemoryRegion *(*get_memory_region)(DIMMDevice *dimm);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvm-unit-tests PATCH 00/14] ppc64: initial drop

2015-11-03 Thread Andrew Jones
On Tue, Nov 03, 2015 at 10:40:18AM +0100, Paolo Bonzini wrote:
> 
> 
> On 03/11/2015 08:08, Thomas Huth wrote:
> > On 03/08/15 16:41, Andrew Jones wrote:
> >> > This series is the first series of a series of series that will
> >> > bring support to kvm-unit-tests for ppc64, and eventually ppc64le.
> >  Hi Andrew,
> > 
> > may I ask about the current state of ppc64 support in the
> > kvm-unit-tests? Is there a newer version available than the one you
> > posted three months ago?
> 

Hi Thomas,

I haven't gotten around to preparing the v2 yet :-(   I do have it on
my TODO list, and I'm looking forward to working on it. Now that I know
you're looking for it, I'll try to bump it up in priority. Thanks for
the interest!

> I've been a slob with all the kvm-unit-tests patches.  Andrew, can you
> send a single submission of all the patches, so that I can review them
> and apply them?

Hi Paolo,

I've got several patches on my staging branch that I believe are ready.
I plan to send those as a big "pull" series for your review soon.

Thanks,
drew

> 
> Paolo
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvm-unit-tests PATCH 00/14] ppc64: initial drop

2015-11-03 Thread Andrew Jones
On Tue, Nov 03, 2015 at 10:40:18AM +0100, Paolo Bonzini wrote:
> 
> 
> On 03/11/2015 08:08, Thomas Huth wrote:
> > On 03/08/15 16:41, Andrew Jones wrote:
> >> > This series is the first series of a series of series that will
> >> > bring support to kvm-unit-tests for ppc64, and eventually ppc64le.
> >  Hi Andrew,
> > 
> > may I ask about the current state of ppc64 support in the
> > kvm-unit-tests? Is there a newer version available than the one you
> > posted three months ago?
> 

Hi Thomas,

I haven't gotten around to preparing the v2 yet :-(   I do have it on
my TODO list, and I'm looking forward to working on it. Now that I know
you're looking for it, I'll try to bump it up in priority. Thanks for
the interest!

> I've been a slob with all the kvm-unit-tests patches.  Andrew, can you
> send a single submission of all the patches, so that I can review them
> and apply them?

Hi Paolo,

I've got several patches on my staging branch that I believe are ready.
I plan to send those as a big "pull" series for your review soon.

Thanks,
drew

> 
> Paolo
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 13/35] hostmem-file: use whole file size if possible

2015-11-03 Thread Xiao Guangrong



On 11/03/2015 01:09 AM, Vladimir Sementsov-Ogievskiy wrote:

On 02.11.2015 12:13, Xiao Guangrong wrote:

Use the whole file size if @size is not specified which is useful
if we want to directly pass a file to guest

Signed-off-by: Xiao Guangrong 
---
  backends/hostmem-file.c | 22 ++
  1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 9097a57..ea355c1 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -38,15 +38,29 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error 
**errp)
  {
  HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend);
-if (!backend->size) {
-error_setg(errp, "can't create backend with size 0");
-return;
-}
  if (!fb->mem_path) {
  error_setg(errp, "mem-path property not set");
  return;
  }
+if (!backend->size) {
+Error *local_err = NULL;
+
+/*
+ * use the whole file size if @size is not specified.
+ */
+backend->size = qemu_file_getlength(fb->mem_path, _err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+
+if (!backend->size) {
+error_setg(errp, "can't create backend on the file whose size is 0");
+return;
+}
+
  backend->force_prealloc = mem_prealloc;
  memory_region_init_ram_from_file(>mr, OBJECT(backend),
   object_get_canonical_path(OBJECT(backend)),


why not just


It look like it is a common style used in whole QEMU code.



+if (!backend->size) {
+/*
+ * use the whole file size if @size is not specified.
+ */
+backend->size = qemu_file_getlength(fb->mem_path, errp);
+if (*errp) {
+return;
+}
+}




But i think your way is better. :)


what the purpose of propagating?


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: allow RSM from 64-bit mode

2015-11-03 Thread Paolo Bonzini


On 03/11/2015 14:40, Laszlo Ersek wrote:
> On 11/03/15 14:29, Paolo Bonzini wrote:
>> The SDM says that exiting system management mode from 64-bit mode
>> is invalid, but that would be too good to be true.  But actually,
>> most of the code is already there to support exiting from compat
>> mode (EFER.LME=1, EFER.LMA=0).  Getting all the way from 64-bit
>> mode to real mode only requires clearing CS.L and CR4.PCIDE.
>>
>> Cc: sta...@vger.kernel.org
>> Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c
>> Cc: Laszlo Ersek 
>> Cc: Radim Krčmář 
>> Signed-off-by: Paolo Bonzini 
>> ---
>>  arch/x86/kvm/emulate.c | 30 +-
>>  1 file changed, 25 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
>> index b60fed56671b..1505587d06e9 100644
>> --- a/arch/x86/kvm/emulate.c
>> +++ b/arch/x86/kvm/emulate.c
>> @@ -2484,16 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
>>  
>>  /*
>>   * Get back to real mode, to prepare a safe state in which to load
>> - * CR0/CR3/CR4/EFER.
>> - *
>> - * CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
>> + * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
>> + * supports long mode.
>>   */
>> +cr4 = ctxt->ops->get_cr(ctxt, 4);
>> +if (emulator_has_longmode(ctxt)) {
>> +struct desc_struct cs_desc;
>> +
>> +/* Zero CR4.PCIDE before CR0.PG.  */
>> +if (cr4 & X86_CR4_PCIDE) {
>> +ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
>> +cr4 &= ~X86_CR4_PCIDE;
>> +}
>> +
>> +/* A 32-bit code segment is required to clear EFER.LMA.  */
>> +memset(_desc, 0, sizeof(cs_desc));
>> +cs_desc.type = 0xb;
>> +cs_desc.s = cs_desc.g = cs_desc.p = 1;
>> +ctxt->ops->set_segment(ctxt, 0, _desc, 0, VCPU_SREG_CS);
>> +}
>> +
>> +/* For the 64-bit case, this will clear EFER.LMA.  */
>>  cr0 = ctxt->ops->get_cr(ctxt, 0);
>>  if (cr0 & X86_CR0_PE)
>>  ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
>> -cr4 = ctxt->ops->get_cr(ctxt, 4);
>> +
>> +/* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
>>  if (cr4 & X86_CR4_PAE)
>>  ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
>> +
>> +/* And finally go back to 32-bit mode.  */
>>  efer = 0;
>>  ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
>>  
>> @@ -4454,7 +4474,7 @@ static const struct opcode twobyte_table[256] = {
>>  F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
>>  /* 0xA8 - 0xAF */
>>  I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
>> -II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
>> +II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
>>  F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
>>  F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
>>  F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
>>
> 
> What branch should I test this on top of?

Just use whatever you were using before, and revert commit c9db607
("UefiCpuPkg: PiSmmCpuDxeSmm: do not execute RSM from 64-bit mode",
2015-10-14) from your OVMF branch.  This is how I tested it, in fact.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: allow RSM from 64-bit mode

2015-11-03 Thread Laszlo Ersek
On 11/03/15 14:46, Paolo Bonzini wrote:
> 
> 
> On 03/11/2015 14:40, Laszlo Ersek wrote:
>> On 11/03/15 14:29, Paolo Bonzini wrote:
>>> The SDM says that exiting system management mode from 64-bit mode
>>> is invalid, but that would be too good to be true.  But actually,
>>> most of the code is already there to support exiting from compat
>>> mode (EFER.LME=1, EFER.LMA=0).  Getting all the way from 64-bit
>>> mode to real mode only requires clearing CS.L and CR4.PCIDE.
>>>
>>> Cc: sta...@vger.kernel.org
>>> Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c
>>> Cc: Laszlo Ersek 
>>> Cc: Radim Krčmář 
>>> Signed-off-by: Paolo Bonzini 
>>> ---
>>>  arch/x86/kvm/emulate.c | 30 +-
>>>  1 file changed, 25 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
>>> index b60fed56671b..1505587d06e9 100644
>>> --- a/arch/x86/kvm/emulate.c
>>> +++ b/arch/x86/kvm/emulate.c
>>> @@ -2484,16 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
>>>  
>>> /*
>>>  * Get back to real mode, to prepare a safe state in which to load
>>> -* CR0/CR3/CR4/EFER.
>>> -*
>>> -* CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
>>> +* CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
>>> +* supports long mode.
>>>  */
>>> +   cr4 = ctxt->ops->get_cr(ctxt, 4);
>>> +   if (emulator_has_longmode(ctxt)) {
>>> +   struct desc_struct cs_desc;
>>> +
>>> +   /* Zero CR4.PCIDE before CR0.PG.  */
>>> +   if (cr4 & X86_CR4_PCIDE) {
>>> +   ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
>>> +   cr4 &= ~X86_CR4_PCIDE;
>>> +   }
>>> +
>>> +   /* A 32-bit code segment is required to clear EFER.LMA.  */
>>> +   memset(_desc, 0, sizeof(cs_desc));
>>> +   cs_desc.type = 0xb;
>>> +   cs_desc.s = cs_desc.g = cs_desc.p = 1;
>>> +   ctxt->ops->set_segment(ctxt, 0, _desc, 0, VCPU_SREG_CS);
>>> +   }
>>> +
>>> +   /* For the 64-bit case, this will clear EFER.LMA.  */
>>> cr0 = ctxt->ops->get_cr(ctxt, 0);
>>> if (cr0 & X86_CR0_PE)
>>> ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
>>> -   cr4 = ctxt->ops->get_cr(ctxt, 4);
>>> +
>>> +   /* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
>>> if (cr4 & X86_CR4_PAE)
>>> ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
>>> +
>>> +   /* And finally go back to 32-bit mode.  */
>>> efer = 0;
>>> ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
>>>  
>>> @@ -4454,7 +4474,7 @@ static const struct opcode twobyte_table[256] = {
>>> F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
>>> /* 0xA8 - 0xAF */
>>> I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
>>> -   II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
>>> +   II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
>>> F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
>>> F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
>>> F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
>>>
>>
>> What branch should I test this on top of?
> 
> Just use whatever you were using before, and revert commit c9db607
> ("UefiCpuPkg: PiSmmCpuDxeSmm: do not execute RSM from 64-bit mode",
> 2015-10-14) from your OVMF branch.

Right, I planned to do that OVMF-side revert; I just wasn't sure if e.g.
kvm/queue had some prerequisite patches for this.

> This is how I tested it, in fact.

I'll try to report back soon.

Thanks!
Laszlo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH v7 03/35] acpi: add aml_create_field

2015-11-03 Thread Xiao Guangrong



On 11/03/2015 02:14 PM, Shannon Zhao wrote:



On 2015/11/2 17:13, Xiao Guangrong wrote:

Implement CreateField term which is used by NVDIMM _DSM method in later patch

Signed-off-by: Xiao Guangrong 
---
  hw/acpi/aml-build.c | 13 +
  include/hw/acpi/aml-build.h |  1 +
  2 files changed, 14 insertions(+)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index a72214d..9fe5e7b 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1151,6 +1151,19 @@ Aml *aml_sizeof(Aml *arg)
  return var;
  }

+/* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefCreateField */
+Aml *aml_create_field(Aml *srcbuf, Aml *index, Aml *len, const char *name)
+{
+Aml *var = aml_alloc();
+build_append_byte(var->buf, 0x5B); /* ExtOpPrefix */
+build_append_byte(var->buf, 0x13); /* CreateFieldOp */
+aml_append(var, srcbuf);
+aml_append(var, index);
+aml_append(var, len);
+build_append_namestring(var->buf, "%s", name);
+return var;
+}
+
  void
  build_header(GArray *linker, GArray *table_data,
   AcpiTableHeader *h, const char *sig, int len, uint8_t rev)
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 7296efb..7e1c43b 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -276,6 +276,7 @@ Aml *aml_touuid(const char *uuid);
  Aml *aml_unicode(const char *str);
  Aml *aml_derefof(Aml *arg);
  Aml *aml_sizeof(Aml *arg);
+Aml *aml_create_field(Aml *srcbuf, Aml *index, Aml *len, const char *name);


Maybe this could be moved together with existing aml_create_dword_field.


Not bad, will do. :)




  void
  build_header(GArray *linker, GArray *table_data,




--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 9/9] kvm/x86: Hyper-V kvm exit

2015-11-03 Thread Paolo Bonzini


On 03/11/2015 15:36, Andrey Smetanin wrote:
>>
>>
>> if I run a patched QEMU but I *do not* enable the synthetic interrupt
>> controller.  I can fix it by wrapping the calls to synic_exit with "if
>> (!host)", but I haven't checked yet the source---so that may not be the
>> proper fix.  Sorry for not having looked more in detail.
>>
> Could you please specify test case(kvm unit tests ?) and kernel/qemu(if
> it's not standard)?

It happens just by starting QEMU.

Kernel: kvm/queue
+ kvm/irqchip: kvm_arch_irq_routing_update renaming split
+ kvm/x86: split ioapic-handled and EOI exit bitmaps
+ kvm/x86: Hyper-V synthetic interrupt controller
+ kvm/x86: Hyper-V kvm exit

QEMU: 3a958f559ecd
+ standard-headers/x86: add Hyper-V SynIC constants
+ target-i386/kvm: Hyper-V SynIC MSR's support
+ linux-headers/kvm: add Hyper-V SynIC irq routing type and struct
+ kvm: Hyper-V SynIC irq routing support
+ linux-headers/kvm: KVM_EXIT_HYPERV type and struct
+ target-i386/hyperv: Hyper-V SynIC SINT routing and vCPU exit
+ hw/misc: Hyper-V test device 'hyperv-testdev'

Can be reproduced just with
"../qemu/+build/x86_64-softmmu/qemu-system-x86_64 --enable-kvm -cpu
kvm64 -display none".

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 09/35] exec: allow file_ram_alloc to work on file

2015-11-03 Thread Xiao Guangrong



On 11/03/2015 08:34 PM, Igor Mammedov wrote:

On Mon,  2 Nov 2015 17:13:11 +0800
Xiao Guangrong  wrote:


Currently, file_ram_alloc() only works on directory - it creates a file
under @path and do mmap on it

This patch tries to allow it to work on file directly, if @path is a
directory it works as before, otherwise it treats @path as the target
file then directly allocate memory from it

Paolo has just queued
https://lists.gnu.org/archive/html/qemu-devel/2015-10/msg06513.html
perhaps that's what you can reuse here.


Yep, Paolo has told me about that, i will update this patchset after his
pull request.

BTW, which tree should this patchset be based on in future development?
Paolo's or Michael's or even upstream qemu tree?

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: allow RSM from 64-bit mode

2015-11-03 Thread Laszlo Ersek
On 11/03/15 14:29, Paolo Bonzini wrote:
> The SDM says that exiting system management mode from 64-bit mode
> is invalid, but that would be too good to be true.  But actually,
> most of the code is already there to support exiting from compat
> mode (EFER.LME=1, EFER.LMA=0).  Getting all the way from 64-bit
> mode to real mode only requires clearing CS.L and CR4.PCIDE.
> 
> Cc: sta...@vger.kernel.org
> Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c
> Cc: Laszlo Ersek 
> Cc: Radim Krčmář 
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/kvm/emulate.c | 30 +-
>  1 file changed, 25 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index b60fed56671b..1505587d06e9 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -2484,16 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
>  
>   /*
>* Get back to real mode, to prepare a safe state in which to load
> -  * CR0/CR3/CR4/EFER.
> -  *
> -  * CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
> +  * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
> +  * supports long mode.
>*/
> + cr4 = ctxt->ops->get_cr(ctxt, 4);
> + if (emulator_has_longmode(ctxt)) {
> + struct desc_struct cs_desc;
> +
> + /* Zero CR4.PCIDE before CR0.PG.  */
> + if (cr4 & X86_CR4_PCIDE) {
> + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
> + cr4 &= ~X86_CR4_PCIDE;
> + }
> +
> + /* A 32-bit code segment is required to clear EFER.LMA.  */
> + memset(_desc, 0, sizeof(cs_desc));
> + cs_desc.type = 0xb;
> + cs_desc.s = cs_desc.g = cs_desc.p = 1;
> + ctxt->ops->set_segment(ctxt, 0, _desc, 0, VCPU_SREG_CS);
> + }
> +
> + /* For the 64-bit case, this will clear EFER.LMA.  */
>   cr0 = ctxt->ops->get_cr(ctxt, 0);
>   if (cr0 & X86_CR0_PE)
>   ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
> - cr4 = ctxt->ops->get_cr(ctxt, 4);
> +
> + /* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
>   if (cr4 & X86_CR4_PAE)
>   ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
> +
> + /* And finally go back to 32-bit mode.  */
>   efer = 0;
>   ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
>  
> @@ -4454,7 +4474,7 @@ static const struct opcode twobyte_table[256] = {
>   F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
>   /* 0xA8 - 0xAF */
>   I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
> - II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
> + II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
>   F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
>   F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
>   F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
> 

What branch should I test this on top of?

Thank you!
Laszlo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: allow RSM from 64-bit mode

2015-11-03 Thread Paolo Bonzini


On 03/11/2015 15:02, Laszlo Ersek wrote:
> On 11/03/15 14:46, Paolo Bonzini wrote:
>>
>>
>> On 03/11/2015 14:40, Laszlo Ersek wrote:
>>> On 11/03/15 14:29, Paolo Bonzini wrote:
 The SDM says that exiting system management mode from 64-bit mode
 is invalid, but that would be too good to be true.  But actually,
 most of the code is already there to support exiting from compat
 mode (EFER.LME=1, EFER.LMA=0).  Getting all the way from 64-bit
 mode to real mode only requires clearing CS.L and CR4.PCIDE.

 Cc: sta...@vger.kernel.org
 Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c
 Cc: Laszlo Ersek 
 Cc: Radim Krčmář 
 Signed-off-by: Paolo Bonzini 
 ---
  arch/x86/kvm/emulate.c | 30 +-
  1 file changed, 25 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
 index b60fed56671b..1505587d06e9 100644
 --- a/arch/x86/kvm/emulate.c
 +++ b/arch/x86/kvm/emulate.c
 @@ -2484,16 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
  
/*
 * Get back to real mode, to prepare a safe state in which to load
 -   * CR0/CR3/CR4/EFER.
 -   *
 -   * CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
 +   * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
 +   * supports long mode.
 */
 +  cr4 = ctxt->ops->get_cr(ctxt, 4);
 +  if (emulator_has_longmode(ctxt)) {
 +  struct desc_struct cs_desc;
 +
 +  /* Zero CR4.PCIDE before CR0.PG.  */
 +  if (cr4 & X86_CR4_PCIDE) {
 +  ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
 +  cr4 &= ~X86_CR4_PCIDE;
 +  }
 +
 +  /* A 32-bit code segment is required to clear EFER.LMA.  */
 +  memset(_desc, 0, sizeof(cs_desc));
 +  cs_desc.type = 0xb;
 +  cs_desc.s = cs_desc.g = cs_desc.p = 1;
 +  ctxt->ops->set_segment(ctxt, 0, _desc, 0, VCPU_SREG_CS);
 +  }
 +
 +  /* For the 64-bit case, this will clear EFER.LMA.  */
cr0 = ctxt->ops->get_cr(ctxt, 0);
if (cr0 & X86_CR0_PE)
ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
 -  cr4 = ctxt->ops->get_cr(ctxt, 4);
 +
 +  /* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
if (cr4 & X86_CR4_PAE)
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
 +
 +  /* And finally go back to 32-bit mode.  */
efer = 0;
ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
  
 @@ -4454,7 +4474,7 @@ static const struct opcode twobyte_table[256] = {
F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
/* 0xA8 - 0xAF */
I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
 -  II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
 +  II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),

>>>
>>> What branch should I test this on top of?
>>
>> Just use whatever you were using before, and revert commit c9db607
>> ("UefiCpuPkg: PiSmmCpuDxeSmm: do not execute RSM from 64-bit mode",
>> 2015-10-14) from your OVMF branch.
> 
> Right, I planned to do that OVMF-side revert; I just wasn't sure if e.g.
> kvm/queue had some prerequisite patches for this.

Indeed, you can use either your "part 2" series or Radim's patches from
kvm/queue, it's the same.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH v7 06/35] acpi: add aml_method_serialized

2015-11-03 Thread Xiao Guangrong



On 11/03/2015 08:30 PM, Igor Mammedov wrote:

On Mon,  2 Nov 2015 17:13:08 +0800
Xiao Guangrong  wrote:


It avoid explicit Mutex and will be used by NVDIMM ACPI

Signed-off-by: Xiao Guangrong 
---
  hw/acpi/aml-build.c | 26 --
  include/hw/acpi/aml-build.h |  1 +
  2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 9f792ab..8bee8b2 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -696,14 +696,36 @@ Aml *aml_while(Aml *predicate)
  }

  /* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefMethod */
-Aml *aml_method(const char *name, int arg_count)
+static Aml *__aml_method(const char *name, int arg_count, bool serialized)

We don't have many users of aml_method() yet, so I'd prefer to have a single
vs multiple function call:

I suggest to do something like:
typedef enum {
 AML_NONSERIALIZED = 0,
 AML_SERIALIZED = 1,
} AmlSerializeRule;

aml_method(const char *name, AmlSerializeRule rule, int synclevel);

with current users fixed up with AML_NONSERIALIZED argument.


Okay. It looks good to me, will follow it.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 09/35] exec: allow file_ram_alloc to work on file

2015-11-03 Thread Paolo Bonzini


On 03/11/2015 04:56, Xiao Guangrong wrote:
> 
> 
> On 11/03/2015 05:12 AM, Paolo Bonzini wrote:
>>
>>
>> On 02/11/2015 10:13, Xiao Guangrong wrote:
>>> Currently, file_ram_alloc() only works on directory - it creates a file
>>> under @path and do mmap on it
>>>
>>> This patch tries to allow it to work on file directly, if @path is a
>>> directory it works as before, otherwise it treats @path as the target
>>> file then directly allocate memory from it
>>>
>>> Signed-off-by: Xiao Guangrong 
>>> ---
>>>   exec.c | 80
>>> ++
>>>   1 file changed, 51 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/exec.c b/exec.c
>>> index 9075f4d..db0fdaf 100644
>>> --- a/exec.c
>>> +++ b/exec.c
>>> @@ -1174,14 +1174,60 @@ void qemu_mutex_unlock_ramlist(void)
>>>   }
>>>
>>>   #ifdef __linux__
>>> +static bool path_is_dir(const char *path)
>>> +{
>>> +struct stat fs;
>>> +
>>> +return stat(path, ) == 0 && S_ISDIR(fs.st_mode);
>>> +}
>>> +
>>> +static int open_ram_file_path(RAMBlock *block, const char *path,
>>> size_t size)
>>> +{
>>> +char *filename;
>>> +char *sanitized_name;
>>> +char *c;
>>> +int fd;
>>> +
>>> +if (!path_is_dir(path)) {
>>> +int flags = (block->flags & RAM_SHARED) ? O_RDWR : O_RDONLY;
>>> +
>>> +flags |= O_EXCL;
>>> +return open(path, flags);
>>> +}
>>> +
>>> +/* Make name safe to use with mkstemp by replacing '/' with '_'. */
>>> +sanitized_name = g_strdup(memory_region_name(block->mr));
>>> +for (c = sanitized_name; *c != '\0'; c++) {
>>> +if (*c == '/') {
>>> +*c = '_';
>>> +}
>>> +}
>>> +filename = g_strdup_printf("%s/qemu_back_mem.%s.XX", path,
>>> +   sanitized_name);
>>> +g_free(sanitized_name);
>>> +fd = mkstemp(filename);
>>> +if (fd >= 0) {
>>> +unlink(filename);
>>> +/*
>>> + * ftruncate is not supported by hugetlbfs in older
>>> + * hosts, so don't bother bailing out on errors.
>>> + * If anything goes wrong with it under other filesystems,
>>> + * mmap will fail.
>>> + */
>>> +if (ftruncate(fd, size)) {
>>> +perror("ftruncate");
>>> +}
>>> +}
>>> +g_free(filename);
>>> +
>>> +return fd;
>>> +}
>>> +
>>>   static void *file_ram_alloc(RAMBlock *block,
>>>   ram_addr_t memory,
>>>   const char *path,
>>>   Error **errp)
>>>   {
>>> -char *filename;
>>> -char *sanitized_name;
>>> -char *c;
>>>   void *area;
>>>   int fd;
>>>   uint64_t pagesize;
>>> @@ -1212,38 +1258,14 @@ static void *file_ram_alloc(RAMBlock *block,
>>>   goto error;
>>>   }
>>>
>>> -/* Make name safe to use with mkstemp by replacing '/' with '_'. */
>>> -sanitized_name = g_strdup(memory_region_name(block->mr));
>>> -for (c = sanitized_name; *c != '\0'; c++) {
>>> -if (*c == '/')
>>> -*c = '_';
>>> -}
>>> -
>>> -filename = g_strdup_printf("%s/qemu_back_mem.%s.XX", path,
>>> -   sanitized_name);
>>> -g_free(sanitized_name);
>>> +memory = ROUND_UP(memory, pagesize);
>>>
>>> -fd = mkstemp(filename);
>>> +fd = open_ram_file_path(block, path, memory);
>>>   if (fd < 0) {
>>>   error_setg_errno(errp, errno,
>>>"unable to create backing store for path
>>> %s", path);
>>> -g_free(filename);
>>>   goto error;
>>>   }
>>> -unlink(filename);
>>> -g_free(filename);
>>> -
>>> -memory = ROUND_UP(memory, pagesize);
>>> -
>>> -/*
>>> - * ftruncate is not supported by hugetlbfs in older
>>> - * hosts, so don't bother bailing out on errors.
>>> - * If anything goes wrong with it under other filesystems,
>>> - * mmap will fail.
>>> - */
>>> -if (ftruncate(fd, memory)) {
>>> -perror("ftruncate");
>>> -}
>>>
>>>   area = qemu_ram_mmap(fd, memory, pagesize, block->flags &
>>> RAM_SHARED);
>>>   if (area == MAP_FAILED) {
>>>
>>
>> I was going to send tomorrow a pull request for a similar patch,
>> "backends/hostmem-file: Allow to specify full pathname for backing file".
>>
>> The main difference seems to be your usage of O_EXCL.  Can you explain
>> why you added it?
> 
> It' used if we pass a block device as a NVDIMM backend memory:
>  O_EXCL can be used without O_CREAT if pathname refers to a block
> device.  If the block device
>  is in use by the system (e.g., mounted), open() fails with the error EBUSY

That makes sense, but I think it's better to be consistent with the
handling of block devices.  Block devices do not use O_EXCL when QEMU
opens them; I guess in principle it would also be possible to share a
single pmem backend between multiple guests.

Paolo
--
To unsubscribe from this 

KVM call for 2015-11-10

2015-11-03 Thread Juan Quintela

Hi

Please, send any topic that you are interested in covering.

 Call details:

By popular demand, a google calendar public entry with it

  
https://www.google.com/calendar/embed?src=dG9iMXRqcXAzN3Y4ZXZwNzRoMHE4a3BqcXNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ

(Let me know if you have any problems with the calendar entry.  I just
gave up about getting right at the same time CEST, CET, EDT and DST).

If you need phone number details,  contact me privately

Thanks, Juan.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 27/35] nvdimm acpi: build ACPI nvdimm devices

2015-11-03 Thread Xiao Guangrong



On 11/03/2015 09:13 PM, Igor Mammedov wrote:

On Mon,  2 Nov 2015 17:13:29 +0800
Xiao Guangrong  wrote:


NVDIMM devices is defined in ACPI 6.0 9.20 NVDIMM Devices

There is a root device under \_SB and specified NVDIMM devices are under the
root device. Each NVDIMM device has _ADR which returns its handle used to
associate MEMDEV structure in NFIT

We reserve handle 0 for root device. In this patch, we save handle, handle,
arg1 and arg2 to dsm memory. Arg3 is conditionally saved in later patch

Signed-off-by: Xiao Guangrong 
---
  hw/acpi/nvdimm.c | 184 +++
  1 file changed, 184 insertions(+)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index dd84e5f..53ed675 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -368,6 +368,15 @@ static void nvdimm_build_nfit(GSList *device_list, GArray 
*table_offsets,
  g_array_free(structures, true);
  }

+struct NvdimmDsmIn {
+uint32_t handle;
+uint32_t revision;
+uint32_t function;
+   /* the remaining size in the page is used by arg3. */
+uint8_t arg3[0];
+} QEMU_PACKED;
+typedef struct NvdimmDsmIn NvdimmDsmIn;
+
  static uint64_t
  nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
  {
@@ -377,6 +386,7 @@ nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
  static void
  nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
  {
+fprintf(stderr, "BUG: we never write DSM notification IO Port.\n");

it doesn't seem like this hunk belongs here


Er, we have changed the logic:
- others:
  1) the buffer length is directly got from IO read rather than got
 from dsm memory
[ This has documented in v5's changelog. ]

So, the IO write is replaced by IO read, nvdimm_dsm_write() should not be
triggered.




  }

  static const MemoryRegionOps nvdimm_dsm_ops = {
@@ -402,6 +412,179 @@ void nvdimm_init_acpi_state(MemoryRegion *memory, 
MemoryRegion *io,
  memory_region_add_subregion(io, NVDIMM_ACPI_IO_BASE, >io_mr);
  }

+#define BUILD_STA_METHOD(_dev_, _method_)  \
+do {   \
+_method_ = aml_method("_STA", 0);  \
+aml_append(_method_, aml_return(aml_int(0x0f)));   \
+aml_append(_dev_, _method_);   \
+} while (0)

_STA doesn't have any logic here so drop macro and just
replace its call sites with:


Okay, I was just wanting to save some code lines. I will drop this macro.



aml_append(foo_dev, aml_name_decl("_STA", aml_int(0xf));


_STA is required as a method with zero argument but this statement just
define a object. It is okay?





+
+#define BUILD_DSM_METHOD(_dev_, _method_, _handle_, _uuid_)\
+do {   \
+Aml *ifctx, *uuid; \
+_method_ = aml_method("_DSM", 4);  \
+/* check UUID if it is we expect, return the errorcode if not.*/   \
+uuid = aml_touuid(_uuid_); \
+ifctx = aml_if(aml_lnot(aml_equal(aml_arg(0), uuid))); \
+aml_append(ifctx, aml_return(aml_int(1 /* Not Supported */))); \
+aml_append(method, ifctx); \
+aml_append(method, aml_return(aml_call4("NCAL", aml_int(_handle_), \
+   aml_arg(1), aml_arg(2), aml_arg(3;  \
+aml_append(_dev_, _method_);   \
+} while (0)
+
+#define BUILD_FIELD_UNIT_SIZE(_field_, _byte_, _name_) \
+aml_append(_field_, aml_named_field(_name_, (_byte_) * BITS_PER_BYTE))
+
+#define BUILD_FIELD_UNIT_STRUCT(_field_, _s_, _f_, _name_) \
+BUILD_FIELD_UNIT_SIZE(_field_, sizeof(typeof_field(_s_, _f_)), _name_)
+
+static void build_nvdimm_devices(GSList *device_list, Aml *root_dev)
+{
+for (; device_list; device_list = device_list->next) {
+NVDIMMDevice *nvdimm = device_list->data;
+int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP,
+   NULL);
+uint32_t handle = nvdimm_slot_to_handle(slot);
+Aml *dev, *method;
+
+dev = aml_device("NV%02X", slot);
+aml_append(dev, aml_name_decl("_ADR", aml_int(handle)));
+
+BUILD_STA_METHOD(dev, method);
+
+/*
+ * Chapter 4: _DSM Interface for NVDIMM Device (non-root) - Example
+ * in DSM Spec Rev1.
+ */
+BUILD_DSM_METHOD(dev, method,
+ handle /* NVDIMM Device Handle */,
+ "4309AC30-0D11-11E4-9191-0800200C9A66"
+ /* UUID for NVDIMM Devices. */);

this will add 

Re: [PATCH v7 09/35] exec: allow file_ram_alloc to work on file

2015-11-03 Thread Xiao Guangrong



On 11/03/2015 09:55 PM, Paolo Bonzini wrote:



On 03/11/2015 04:56, Xiao Guangrong wrote:



On 11/03/2015 05:12 AM, Paolo Bonzini wrote:



On 02/11/2015 10:13, Xiao Guangrong wrote:

Currently, file_ram_alloc() only works on directory - it creates a file
under @path and do mmap on it

This patch tries to allow it to work on file directly, if @path is a
directory it works as before, otherwise it treats @path as the target
file then directly allocate memory from it

Signed-off-by: Xiao Guangrong 
---
   exec.c | 80
++
   1 file changed, 51 insertions(+), 29 deletions(-)

diff --git a/exec.c b/exec.c
index 9075f4d..db0fdaf 100644
--- a/exec.c
+++ b/exec.c
@@ -1174,14 +1174,60 @@ void qemu_mutex_unlock_ramlist(void)
   }

   #ifdef __linux__
+static bool path_is_dir(const char *path)
+{
+struct stat fs;
+
+return stat(path, ) == 0 && S_ISDIR(fs.st_mode);
+}
+
+static int open_ram_file_path(RAMBlock *block, const char *path,
size_t size)
+{
+char *filename;
+char *sanitized_name;
+char *c;
+int fd;
+
+if (!path_is_dir(path)) {
+int flags = (block->flags & RAM_SHARED) ? O_RDWR : O_RDONLY;
+
+flags |= O_EXCL;
+return open(path, flags);
+}
+
+/* Make name safe to use with mkstemp by replacing '/' with '_'. */
+sanitized_name = g_strdup(memory_region_name(block->mr));
+for (c = sanitized_name; *c != '\0'; c++) {
+if (*c == '/') {
+*c = '_';
+}
+}
+filename = g_strdup_printf("%s/qemu_back_mem.%s.XX", path,
+   sanitized_name);
+g_free(sanitized_name);
+fd = mkstemp(filename);
+if (fd >= 0) {
+unlink(filename);
+/*
+ * ftruncate is not supported by hugetlbfs in older
+ * hosts, so don't bother bailing out on errors.
+ * If anything goes wrong with it under other filesystems,
+ * mmap will fail.
+ */
+if (ftruncate(fd, size)) {
+perror("ftruncate");
+}
+}
+g_free(filename);
+
+return fd;
+}
+
   static void *file_ram_alloc(RAMBlock *block,
   ram_addr_t memory,
   const char *path,
   Error **errp)
   {
-char *filename;
-char *sanitized_name;
-char *c;
   void *area;
   int fd;
   uint64_t pagesize;
@@ -1212,38 +1258,14 @@ static void *file_ram_alloc(RAMBlock *block,
   goto error;
   }

-/* Make name safe to use with mkstemp by replacing '/' with '_'. */
-sanitized_name = g_strdup(memory_region_name(block->mr));
-for (c = sanitized_name; *c != '\0'; c++) {
-if (*c == '/')
-*c = '_';
-}
-
-filename = g_strdup_printf("%s/qemu_back_mem.%s.XX", path,
-   sanitized_name);
-g_free(sanitized_name);
+memory = ROUND_UP(memory, pagesize);

-fd = mkstemp(filename);
+fd = open_ram_file_path(block, path, memory);
   if (fd < 0) {
   error_setg_errno(errp, errno,
"unable to create backing store for path
%s", path);
-g_free(filename);
   goto error;
   }
-unlink(filename);
-g_free(filename);
-
-memory = ROUND_UP(memory, pagesize);
-
-/*
- * ftruncate is not supported by hugetlbfs in older
- * hosts, so don't bother bailing out on errors.
- * If anything goes wrong with it under other filesystems,
- * mmap will fail.
- */
-if (ftruncate(fd, memory)) {
-perror("ftruncate");
-}

   area = qemu_ram_mmap(fd, memory, pagesize, block->flags &
RAM_SHARED);
   if (area == MAP_FAILED) {



I was going to send tomorrow a pull request for a similar patch,
"backends/hostmem-file: Allow to specify full pathname for backing file".

The main difference seems to be your usage of O_EXCL.  Can you explain
why you added it?


It' used if we pass a block device as a NVDIMM backend memory:
  O_EXCL can be used without O_CREAT if pathname refers to a block
device.  If the block device
  is in use by the system (e.g., mounted), open() fails with the error EBUSY


That makes sense, but I think it's better to be consistent with the
handling of block devices.  Block devices do not use O_EXCL when QEMU
opens them; I guess in principle it would also be possible to share a
single pmem backend between multiple guests.


Yup. Will make a separate patch to do this. :)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 9/9] kvm/x86: Hyper-V kvm exit

2015-11-03 Thread Andrey Smetanin



On 11/03/2015 04:28 PM, Paolo Bonzini wrote:



On 22/10/2015 18:10, Andrey Smetanin wrote:

A new vcpu exit is introduced to notify the userspace of the
changes in Hyper-V SynIC configuration triggered by guest writing to the
corresponding MSRs.

Changes v3:
* added KVM_EXIT_HYPERV types and structs notes into docs

Signed-off-by: Andrey Smetanin 
Reviewed-by: Roman Kagan 
Signed-off-by: Denis V. Lunev 
CC: Vitaly Kuznetsov 
CC: "K. Y. Srinivasan" 
CC: Gleb Natapov 
CC: Paolo Bonzini 
CC: Roman Kagan 

---
  Documentation/virtual/kvm/api.txt | 22 ++
  arch/x86/include/asm/kvm_host.h   |  1 +
  arch/x86/kvm/hyperv.c | 17 +
  arch/x86/kvm/x86.c|  6 ++
  include/linux/kvm_host.h  |  1 +
  include/uapi/linux/kvm.h  | 17 +
  6 files changed, 64 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 8710418..a6858eb 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3337,6 +3337,28 @@ the userspace IOAPIC should process the EOI and 
retrigger the interrupt if
  it is still asserted.  Vector is the LAPIC interrupt vector for which the
  EOI was received.

+   struct kvm_hyperv_exit {
+#define KVM_EXIT_HYPERV_SYNIC  1
+   __u32 type;
+   union {
+   struct {
+   __u32 msr;
+   __u64 control;
+   __u64 evt_page;
+   __u64 msg_page;
+   } synic;
+   } u;
+   };
+   /* KVM_EXIT_HYPERV */
+struct kvm_hyperv_exit hyperv;
+Indicates that the VCPU exits into userspace to process some tasks
+related to Hyper-V emulation.
+Valid values for 'type' are:
+   KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about
+Hyper-V SynIC state change. Notification is used to remap SynIC
+event/message pages and to enable/disable SynIC messages/events processing
+in userspace.
+
/* Fix the size of the union. */
char padding[256];
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8434f88..54c90d3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -392,6 +392,7 @@ struct kvm_vcpu_hv {
u64 hv_vapic;
s64 runtime_offset;
struct kvm_vcpu_hv_synic synic;
+   struct kvm_hyperv_exit exit;
  };

  struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 8ff71f3..9443920 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -129,6 +129,20 @@ static void kvm_hv_notify_acked_sint(struct kvm_vcpu 
*vcpu, u32 sint)
srcu_read_unlock(>irq_srcu, idx);
  }

+static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
+{
+   struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+   struct kvm_vcpu_hv *hv_vcpu = >arch.hyperv;
+
+   hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
+   hv_vcpu->exit.u.synic.msr = msr;
+   hv_vcpu->exit.u.synic.control = synic->control;
+   hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
+   hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
+
+   kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
+}
+
  static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 u32 msr, u64 data, bool host)
  {
@@ -141,6 +155,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
switch (msr) {
case HV_X64_MSR_SCONTROL:
synic->control = data;
+   synic_exit(synic, msr);


Another note.  I am getting:

EAX= EBX= ECX= EDX=0663
ESI= EDI= EBP= ESP=
EIP=fff0 EFL=0002 [---] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =   9300
CS =f000   9b00
SS =   9300
DS =   9300
FS =   9300
GS =   9300
LDT=   8200
TR =   8b00
GDT=  
IDT=  
CR0=6010 CR2= CR3= CR4=
DR0= DR1= DR2=
DR3=
DR6=0ff0 DR7=0400
EFER=
Code=90 90 90 90 eb c3 90 90 90 90 90 90 00 00 00 00 56 54 46 00 <90> 90
eb ac 90 90 90 90 90 90 90 90 90 90 90 90 00 00 00 00 00 00 00 00 00 00
00 00 00 00

if I run a patched QEMU but I *do not* enable the synthetic interrupt
controller.  I can fix it by wrapping the calls to synic_exit with 

Re: [PATCH v3 0/3] virtio DMA API core stuff

2015-11-03 Thread Paolo Bonzini


On 29/10/2015 10:01, Michael S. Tsirkin wrote:
> > Everyone seems to agree that x86's emulated Q35 thing
> > is just buggy right now and should be taught to use the existing ACPI
> > mechanism for enumerating passthrough devices.
> 
> I'm not sure what ACPI has to do with it.
> It's about a way for guest users to specify whether
> they want to bypass an IOMMU for a given device.

It's not configured in the guest, it's configured _when starting_ the
guest (e.g. -device some-pci-device,iommu-bypass=on) and it is reflected
in the DMAR table or the device tree.

The default for virtio and VFIO is to bypass the IOMMU.  Changing the
default can be supported (virtio) or not (VFIO, vhost-user).  Hotplug
need to check whether the parent bridge is has the same setting that the
user desires for the new device.

> 1. virtio ignores the iommu
> 2. vhost user ignores the iommu
> 3. dataplane ignores the iommu
> 4. vhost-net ignores the iommu
> 5. VFIO ignores the iommu
> 
> I think so far I only saw patches for 1 above.

1 and 3 are easy.  For 2 and 5 you can simply forbid configurations with
vhost-user/VFIO behind an IOMMU.  For 4 QEMU can simply not activate
vhost-net and use the userspace fallback.

However, IOMMU support in QEMU is experimental.  We can do things a step
at a time.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM/arm: kernel low level debug support for ARM32 virtual platforms

2015-11-03 Thread Christopher Covington
Hi Mario,

On 11/02/2015 06:51 PM, Mario Smarduch wrote:
> Hello,
>this is a re-post from couple weeks ago, please take time to review this 
> simple patch which simplifies DEBUG_LL and prevents kernel crash on virtual 
> platforms.
> 
> Before this patch DEBUG_LL for 'dummy virtual machine':
> 
> ( ) Kernel low-level debugging via EmbeddedICE DCC channel
> ( ) Kernel low-level debug output via semihosting I/O
> ( ) Kernel low-level debugging via 8250 UART
> ( ) Kernel low-level debugging via ARM Ltd PL01x Primecell
> 
> In summary if debug uart is not emulated kernel crashes.
> And once you pass that hurdle, uart physical/virtual addresses are unknown.
> DEBUG_LL comes in handy on many occasions and should be somewhat 
> intuitive to use like it is for physical platforms. For virtual platforms
> user may start daubting the host and get into a bigger mess.
> 
> After this patch is applied user gets:
> 
> (X) Kernel low-level debugging on QEMU Virtual Platform
> ( ) Kernel low-level debugging on Kvmtool Virtual Platform
>   . above repeated 
> 
> The virtual addresses selected follow arm reference models, high in vmalloc 
> section with high mem enabled and guest running with >= 1GB of memory. The 
> offset is leftover from arm reference models.

Which model? It doesn't appear to match the vexpress AEM/RTSM/FVP/whatever
which used 0x1c09 for UART0.

> The patch is against 4.2.0-rc2 commit 43297dda0a51
> 
> Original Description
> 
> When booting a VM using QEMU or Kvmtool there are no clear ways to 
> enable low level debugging for these virtual platforms. some menu port 
> choices are not supported by the virtual platforms at all. And there is no
> help on the location of physical and virtual addresses for the ports.
> This may lead to wrong debug port and a frozen VM with a blank screen.
> 
> This patch adds menu selections for QEMU and Kvmtool virtual platforms for 
> low 
> level kernel print debugging. Help section displays port physical and
> virutal addresses.
> 
> ARM reference models use the MIDR register to run-time select UART port 
> address 
> (for ARCH_VEXPRESS) based on A9 or A15 part numbers. Looked for a same 
> approach
> but couldn't find a way to differentiate between virtual platforms, something
> like a platform register.
> 
> Acked-by: Christoffer Dall 
> Signed-off-by: Mario Smarduch 
> ---
>  arch/arm/Kconfig.debug | 22 ++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
> index a2e16f9..d126bd4 100644
> --- a/arch/arm/Kconfig.debug
> +++ b/arch/arm/Kconfig.debug
> @@ -1155,6 +1155,28 @@ choice
> This option selects UART0 on VIA/Wondermedia System-on-a-chip
> devices, including VT8500, WM8505, WM8650 and WM8850.
>  
> + config DEBUG_VIRT_UART_QEMU
> + bool "Kernel low-level debugging on QEMU Virtual Platform"
> + depends on ARCH_VIRT
> + select DEBUG_UART_PL01X
> + help
> +   Say Y here if you want the debug print routines to direct
> +   their output to PL011 UART port on QEMU Virtual Platform.
> +   Appropriate address values are:
> + PHYSVIRT
> + 0x900   0xf809

I thought the only guarantee the virt machine had about the memory map was
that it would be described in the device tree.

> + config DEBUG_VIRT_UART_KVMTOOL
> + bool "Kernel low-level debugging on Kvmtool Virtual Platform"
> + depends on ARCH_VIRT
> + select DEBUG_UART_8250
> + help
> +   Say Y here if you want the debug print routines to direct
> +   their output to 8250 UART port on Kvmtool Virtual
> +   Platform. Appropriate address values are:
> + PHYSVIRT
> + 0x3f8   0xf80903f8
> +
>   config DEBUG_ICEDCC
>   bool "Kernel low-level debugging via EmbeddedICE DCC channel"
>   help
> 

Regards,
Christopher Covington

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html